Extend superblock size fo 128x128 pixels.
If --enable-ext-partition is used at build time, the superblock size (sometimes also referred to as coding unit (CU) size) is extended to 128x128 pixels. Change-Id: Ie09cec6b7e8d765b7555ff5d80974aab60803f3a
This commit is contained in:
		| @@ -28,7 +28,7 @@ | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| static const unsigned int kMaxDimension = MAX_CU_SIZE; | ||||
| static const unsigned int kMaxDimension = MAX_SB_SIZE; | ||||
|  | ||||
| typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, | ||||
|                              uint8_t *dst, ptrdiff_t dst_stride, | ||||
|   | ||||
| @@ -50,16 +50,16 @@ class MaskedSADTest : public ::testing::TestWithParam<MaskedSADParam> { | ||||
| TEST_P(MaskedSADTest, OperationCheck) { | ||||
|   unsigned int ref_ret, ret; | ||||
|   ACMRandom rnd(ACMRandom::DeterministicSeed()); | ||||
|   DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   int err_count = 0; | ||||
|   int first_failure = -1; | ||||
|   int src_stride = MAX_CU_SIZE; | ||||
|   int ref_stride = MAX_CU_SIZE; | ||||
|   int msk_stride = MAX_CU_SIZE; | ||||
|   int src_stride = MAX_SB_SIZE; | ||||
|   int ref_stride = MAX_SB_SIZE; | ||||
|   int msk_stride = MAX_SB_SIZE; | ||||
|   for (int i = 0; i < number_of_iterations; ++i) { | ||||
|     for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { | ||||
|     for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) { | ||||
|       src_ptr[j] = rnd.Rand8(); | ||||
|       ref_ptr[j] = rnd.Rand8(); | ||||
|       msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64; | ||||
| @@ -108,18 +108,18 @@ class HighbdMaskedSADTest : public ::testing:: | ||||
| TEST_P(HighbdMaskedSADTest, OperationCheck) { | ||||
|   unsigned int ref_ret, ret; | ||||
|   ACMRandom rnd(ACMRandom::DeterministicSeed()); | ||||
|   DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); | ||||
|   uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); | ||||
|   int err_count = 0; | ||||
|   int first_failure = -1; | ||||
|   int src_stride = MAX_CU_SIZE; | ||||
|   int ref_stride = MAX_CU_SIZE; | ||||
|   int msk_stride = MAX_CU_SIZE; | ||||
|   int src_stride = MAX_SB_SIZE; | ||||
|   int ref_stride = MAX_SB_SIZE; | ||||
|   int msk_stride = MAX_SB_SIZE; | ||||
|   for (int i = 0; i < number_of_iterations; ++i) { | ||||
|     for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { | ||||
|     for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) { | ||||
|       src_ptr[j] = rnd.Rand16()&0xfff; | ||||
|       ref_ptr[j] = rnd.Rand16()&0xfff; | ||||
|       msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64; | ||||
|   | ||||
| @@ -58,17 +58,17 @@ TEST_P(MaskedVarianceTest, OperationCheck) { | ||||
|   unsigned int ref_ret, opt_ret; | ||||
|   unsigned int ref_sse, opt_sse; | ||||
|   ACMRandom rnd(ACMRandom::DeterministicSeed()); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   int err_count = 0; | ||||
|   int first_failure = -1; | ||||
|   int src_stride = MAX_CU_SIZE; | ||||
|   int ref_stride = MAX_CU_SIZE; | ||||
|   int msk_stride = MAX_CU_SIZE; | ||||
|   int src_stride = MAX_SB_SIZE; | ||||
|   int ref_stride = MAX_SB_SIZE; | ||||
|   int msk_stride = MAX_SB_SIZE; | ||||
|  | ||||
|   for (int i = 0; i < number_of_iterations; ++i) { | ||||
|     for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { | ||||
|     for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) { | ||||
|       src_ptr[j] = rnd.Rand8(); | ||||
|       ref_ptr[j] = rnd.Rand8(); | ||||
|       msk_ptr[j] = rnd(65); | ||||
| @@ -100,19 +100,19 @@ TEST_P(MaskedVarianceTest, ExtremeValues) { | ||||
|   unsigned int ref_ret, opt_ret; | ||||
|   unsigned int ref_sse, opt_sse; | ||||
|   ACMRandom rnd(ACMRandom::DeterministicSeed()); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   int err_count = 0; | ||||
|   int first_failure = -1; | ||||
|   int src_stride = MAX_CU_SIZE; | ||||
|   int ref_stride = MAX_CU_SIZE; | ||||
|   int msk_stride = MAX_CU_SIZE; | ||||
|   int src_stride = MAX_SB_SIZE; | ||||
|   int ref_stride = MAX_SB_SIZE; | ||||
|   int msk_stride = MAX_SB_SIZE; | ||||
|  | ||||
|   for (int i = 0; i < 8; ++i) { | ||||
|     memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE); | ||||
|     memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE); | ||||
|     memset(msk_ptr, (i & 0x4) ?  64 : 0, MAX_CU_SIZE*MAX_CU_SIZE); | ||||
|     memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_SB_SIZE*MAX_SB_SIZE); | ||||
|     memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_SB_SIZE*MAX_SB_SIZE); | ||||
|     memset(msk_ptr, (i & 0x4) ?  64 : 0, MAX_SB_SIZE*MAX_SB_SIZE); | ||||
|  | ||||
|     ref_ret = ref_func_(src_ptr, src_stride, | ||||
|                         ref_ptr, ref_stride, | ||||
| @@ -166,21 +166,21 @@ TEST_P(MaskedSubPixelVarianceTest, OperationCheck) { | ||||
|   unsigned int ref_ret, opt_ret; | ||||
|   unsigned int ref_sse, opt_sse; | ||||
|   ACMRandom rnd(ACMRandom::DeterministicSeed()); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   int err_count = 0; | ||||
|   int first_failure = -1; | ||||
|   int src_stride = (MAX_CU_SIZE+1); | ||||
|   int ref_stride = (MAX_CU_SIZE+1); | ||||
|   int msk_stride = (MAX_CU_SIZE+1); | ||||
|   int src_stride = (MAX_SB_SIZE+1); | ||||
|   int ref_stride = (MAX_SB_SIZE+1); | ||||
|   int msk_stride = (MAX_SB_SIZE+1); | ||||
|   int xoffset; | ||||
|   int yoffset; | ||||
|  | ||||
|   for (int i = 0; i < number_of_iterations; ++i) { | ||||
|     int xoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)}; | ||||
|     int yoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)}; | ||||
|     for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) { | ||||
|     for (int j = 0; j < (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1); j++) { | ||||
|       src_ptr[j] = rnd.Rand8(); | ||||
|       ref_ptr[j] = rnd.Rand8(); | ||||
|       msk_ptr[j] = rnd(65); | ||||
| @@ -221,23 +221,23 @@ TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) { | ||||
|   unsigned int ref_ret, opt_ret; | ||||
|   unsigned int ref_sse, opt_sse; | ||||
|   ACMRandom rnd(ACMRandom::DeterministicSeed()); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   int first_failure_x = -1; | ||||
|   int first_failure_y = -1; | ||||
|   int err_count = 0; | ||||
|   int first_failure = -1; | ||||
|   int src_stride = (MAX_CU_SIZE+1); | ||||
|   int ref_stride = (MAX_CU_SIZE+1); | ||||
|   int msk_stride = (MAX_CU_SIZE+1); | ||||
|   int src_stride = (MAX_SB_SIZE+1); | ||||
|   int ref_stride = (MAX_SB_SIZE+1); | ||||
|   int msk_stride = (MAX_SB_SIZE+1); | ||||
|  | ||||
|   for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) { | ||||
|     for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) { | ||||
|       for (int i = 0; i < 8; ++i) { | ||||
|         memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); | ||||
|         memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); | ||||
|         memset(msk_ptr, (i & 0x4) ?  64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); | ||||
|         memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); | ||||
|         memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); | ||||
|         memset(msk_ptr, (i & 0x4) ?  64 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); | ||||
|  | ||||
|         ref_ret = ref_func_(src_ptr, src_stride, | ||||
|                             xoffset, yoffset, | ||||
| @@ -297,19 +297,19 @@ TEST_P(HighbdMaskedVarianceTest, OperationCheck) { | ||||
|   unsigned int ref_ret, opt_ret; | ||||
|   unsigned int ref_sse, opt_sse; | ||||
|   ACMRandom rnd(ACMRandom::DeterministicSeed()); | ||||
|   DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); | ||||
|   uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); | ||||
|   int err_count = 0; | ||||
|   int first_failure = -1; | ||||
|   int src_stride = MAX_CU_SIZE; | ||||
|   int ref_stride = MAX_CU_SIZE; | ||||
|   int msk_stride = MAX_CU_SIZE; | ||||
|   int src_stride = MAX_SB_SIZE; | ||||
|   int ref_stride = MAX_SB_SIZE; | ||||
|   int msk_stride = MAX_SB_SIZE; | ||||
|  | ||||
|   for (int i = 0; i < number_of_iterations; ++i) { | ||||
|     for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { | ||||
|     for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) { | ||||
|       src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); | ||||
|       ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); | ||||
|       msk_ptr[j] = rnd(65); | ||||
| @@ -341,23 +341,23 @@ TEST_P(HighbdMaskedVarianceTest, ExtremeValues) { | ||||
|   unsigned int ref_ret, opt_ret; | ||||
|   unsigned int ref_sse, opt_sse; | ||||
|   ACMRandom rnd(ACMRandom::DeterministicSeed()); | ||||
|   DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]); | ||||
|   uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); | ||||
|   uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); | ||||
|   int err_count = 0; | ||||
|   int first_failure = -1; | ||||
|   int src_stride = MAX_CU_SIZE; | ||||
|   int ref_stride = MAX_CU_SIZE; | ||||
|   int msk_stride = MAX_CU_SIZE; | ||||
|   int src_stride = MAX_SB_SIZE; | ||||
|   int ref_stride = MAX_SB_SIZE; | ||||
|   int msk_stride = MAX_SB_SIZE; | ||||
|  | ||||
|   for (int i = 0; i < 8; ++i) { | ||||
|     vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0, | ||||
|                  MAX_CU_SIZE*MAX_CU_SIZE); | ||||
|                  MAX_SB_SIZE*MAX_SB_SIZE); | ||||
|     vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0, | ||||
|                  MAX_CU_SIZE*MAX_CU_SIZE); | ||||
|     memset(msk_ptr, (i & 0x4) ?  64 : 0, MAX_CU_SIZE*MAX_CU_SIZE); | ||||
|                  MAX_SB_SIZE*MAX_SB_SIZE); | ||||
|     memset(msk_ptr, (i & 0x4) ?  64 : 0, MAX_SB_SIZE*MAX_SB_SIZE); | ||||
|  | ||||
|     ref_ret = ref_func_(src8_ptr, src_stride, | ||||
|                         ref8_ptr, ref_stride, | ||||
| @@ -407,24 +407,24 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) { | ||||
|   unsigned int ref_ret, opt_ret; | ||||
|   unsigned int ref_sse, opt_sse; | ||||
|   ACMRandom rnd(ACMRandom::DeterministicSeed()); | ||||
|   DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); | ||||
|   uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); | ||||
|   int err_count = 0; | ||||
|   int first_failure = -1; | ||||
|   int first_failure_x = -1; | ||||
|   int first_failure_y = -1; | ||||
|   int src_stride = (MAX_CU_SIZE+1); | ||||
|   int ref_stride = (MAX_CU_SIZE+1); | ||||
|   int msk_stride = (MAX_CU_SIZE+1); | ||||
|   int src_stride = (MAX_SB_SIZE+1); | ||||
|   int ref_stride = (MAX_SB_SIZE+1); | ||||
|   int msk_stride = (MAX_SB_SIZE+1); | ||||
|   int xoffset, yoffset; | ||||
|  | ||||
|   for (int i = 0; i < number_of_iterations; ++i) { | ||||
|     for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) { | ||||
|       for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) { | ||||
|         for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) { | ||||
|         for (int j = 0; j < (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1); j++) { | ||||
|           src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); | ||||
|           ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); | ||||
|           msk_ptr[j] = rnd(65); | ||||
| @@ -465,27 +465,27 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) { | ||||
|   unsigned int ref_ret, opt_ret; | ||||
|   unsigned int ref_sse, opt_sse; | ||||
|   ACMRandom rnd(ACMRandom::DeterministicSeed()); | ||||
|   DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   DECLARE_ALIGNED(16, uint8_t,  msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]); | ||||
|   uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); | ||||
|   uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); | ||||
|   int first_failure_x = -1; | ||||
|   int first_failure_y = -1; | ||||
|   int err_count = 0; | ||||
|   int first_failure = -1; | ||||
|   int src_stride = (MAX_CU_SIZE+1); | ||||
|   int ref_stride = (MAX_CU_SIZE+1); | ||||
|   int msk_stride = (MAX_CU_SIZE+1); | ||||
|   int src_stride = (MAX_SB_SIZE+1); | ||||
|   int ref_stride = (MAX_SB_SIZE+1); | ||||
|   int msk_stride = (MAX_SB_SIZE+1); | ||||
|  | ||||
|   for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) { | ||||
|     for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) { | ||||
|       for (int i = 0; i < 8; ++i) { | ||||
|         vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0, | ||||
|                      (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); | ||||
|                      (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); | ||||
|         vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0, | ||||
|                      (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); | ||||
|         memset(msk_ptr, (i & 0x4) ?   64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); | ||||
|                      (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); | ||||
|         memset(msk_ptr, (i & 0x4) ?   64 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)); | ||||
|  | ||||
|         ref_ret = ref_func_(src8_ptr, src_stride, | ||||
|                             xoffset, yoffset, | ||||
|   | ||||
| @@ -10,13 +10,16 @@ | ||||
| 
 | ||||
| #include "third_party/googletest/src/include/gtest/gtest.h" | ||||
| 
 | ||||
| #include "./vp9_rtcd.h" | ||||
| #include "./vpx_config.h" | ||||
| #include "./vpx_dsp_rtcd.h" | ||||
| #include "test/acm_random.h" | ||||
| #include "test/clear_system_state.h" | ||||
| #include "test/register_state_check.h" | ||||
| #if CONFIG_VP10 | ||||
| #include "vp10/common/blockd.h" | ||||
| #elif CONFIG_VP9 | ||||
| #include "vp9/common/vp9_blockd.h" | ||||
| #endif | ||||
| #include "vpx_mem/vpx_mem.h" | ||||
| 
 | ||||
| typedef void (*SubtractFunc)(int rows, int cols, | ||||
| @@ -24,7 +27,7 @@ typedef void (*SubtractFunc)(int rows, int cols, | ||||
|                              const uint8_t *src_ptr, ptrdiff_t src_stride, | ||||
|                              const uint8_t *pred_ptr, ptrdiff_t pred_stride); | ||||
| 
 | ||||
| namespace vp9 { | ||||
| namespace { | ||||
| 
 | ||||
| class VP9SubtractBlockTest : public ::testing::TestWithParam<SubtractFunc> { | ||||
|  public: | ||||
| @@ -105,5 +108,4 @@ INSTANTIATE_TEST_CASE_P(NEON, VP9SubtractBlockTest, | ||||
| INSTANTIATE_TEST_CASE_P(MSA, VP9SubtractBlockTest, | ||||
|                         ::testing::Values(vpx_subtract_block_msa)); | ||||
| #endif | ||||
| 
 | ||||
| }  // namespace vp9
 | ||||
| }  // namespace
 | ||||
| @@ -147,7 +147,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc | ||||
| LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc | ||||
| LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc | ||||
| LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc | ||||
| LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc | ||||
| LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += subtract_test.cc | ||||
|  | ||||
| ifeq ($(CONFIG_VP9_ENCODER),yes) | ||||
| LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc | ||||
| @@ -172,6 +172,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_fht16x16_test.cc | ||||
| LIBVPX_TEST_SRCS-$(CONFIG_ANS)          += vp10_ans_test.cc | ||||
|  | ||||
| LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += sum_squares_test.cc | ||||
| LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += subtract_test.cc | ||||
|  | ||||
| ifeq ($(CONFIG_EXT_INTER),yes) | ||||
| LIBVPX_TEST_SRCS-$(HAVE_SSSE3) += masked_variance_test.cc | ||||
|   | ||||
| @@ -44,9 +44,6 @@ typedef enum { | ||||
| #define IsInterpolatingFilter(filter)  (1) | ||||
| #endif  // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS | ||||
|  | ||||
| #define MAXTXLEN 32 | ||||
| #define CU_SIZE  64 | ||||
|  | ||||
| static INLINE int is_inter_mode(PREDICTION_MODE mode) { | ||||
| #if CONFIG_EXT_INTER | ||||
|   return mode >= NEARESTMV && mode <= NEW_NEWMV; | ||||
| @@ -167,8 +164,8 @@ typedef struct { | ||||
|   PREDICTION_MODE mode; | ||||
|   TX_SIZE tx_size; | ||||
| #if CONFIG_VAR_TX | ||||
|   // TODO(jingning): This effectively assigned an entry for each 8x8 block. | ||||
|   // Apparently it takes much more space than needed. | ||||
|   // TODO(jingning): This effectively assigned a separate entry for each | ||||
|   // 8x8 block. Apparently it takes much more space than needed. | ||||
|   TX_SIZE inter_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; | ||||
| #endif | ||||
|   int8_t skip; | ||||
| @@ -318,15 +315,15 @@ typedef struct macroblockd { | ||||
|   const YV12_BUFFER_CONFIG *cur_buf; | ||||
|  | ||||
|   ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; | ||||
|   ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; | ||||
|   ENTROPY_CONTEXT left_context[MAX_MB_PLANE][2 * MI_BLOCK_SIZE]; | ||||
|  | ||||
|   PARTITION_CONTEXT *above_seg_context; | ||||
|   PARTITION_CONTEXT left_seg_context[8]; | ||||
|   PARTITION_CONTEXT left_seg_context[MI_BLOCK_SIZE]; | ||||
|  | ||||
| #if CONFIG_VAR_TX | ||||
|   TXFM_CONTEXT *above_txfm_context; | ||||
|   TXFM_CONTEXT *left_txfm_context; | ||||
|   TXFM_CONTEXT left_txfm_context_buffer[8]; | ||||
|   TXFM_CONTEXT left_txfm_context_buffer[MI_BLOCK_SIZE]; | ||||
|  | ||||
|   TX_SIZE max_tx_size; | ||||
| #if CONFIG_SUPERTX | ||||
| @@ -686,6 +683,7 @@ void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, | ||||
|  | ||||
| #if CONFIG_EXT_INTER | ||||
| static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) { | ||||
|   // TODO(debargha): Should this be bsize < BLOCK_LARGEST? | ||||
|   return (bsize >= BLOCK_8X8) && (bsize < BLOCK_64X64); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -19,154 +19,282 @@ | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION | ||||
| # define IF_EXT_PARTITION(...) __VA_ARGS__ | ||||
| #else | ||||
| # define IF_EXT_PARTITION(...) | ||||
| #endif | ||||
|  | ||||
| // Log 2 conversion lookup tables for block width and height | ||||
| static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = | ||||
|   {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4}; | ||||
|   {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5)}; | ||||
| static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = | ||||
|   {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4}; | ||||
| static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = | ||||
|   {1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16}; | ||||
| static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = | ||||
|   {1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16}; | ||||
|   {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5)}; | ||||
| // Log 2 conversion lookup tables for modeinfo width and height | ||||
| static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = | ||||
|   {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}; | ||||
|   {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, IF_EXT_PARTITION(3, 4, 4)}; | ||||
| static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] = | ||||
|   {0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3}; | ||||
|   {0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4)}; | ||||
|  | ||||
| // Width/height lookup tables in units of varios block sizes | ||||
| static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = | ||||
|   {1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32)}; | ||||
| static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = | ||||
|   {1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32)}; | ||||
| static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = | ||||
|   {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8}; | ||||
|   {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16)}; | ||||
| static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = | ||||
|   {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8}; | ||||
|   {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16)}; | ||||
| static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES] = | ||||
|   {1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, IF_EXT_PARTITION(4, 8, 8)}; | ||||
| static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] = | ||||
|   {1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)}; | ||||
|  | ||||
| // VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize))) | ||||
| static const uint8_t size_group_lookup[BLOCK_SIZES] = | ||||
|   {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3}; | ||||
|   {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3)}; | ||||
|  | ||||
| static const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = | ||||
|   {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12}; | ||||
|   {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, IF_EXT_PARTITION(13, 13, 14)}; | ||||
|  | ||||
| static const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = { | ||||
|   {  // 4X4 | ||||
|     // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 | ||||
|     PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, | ||||
| static const PARTITION_TYPE | ||||
|   partition_lookup[MAX_SB_SIZE_LOG2 - 1][BLOCK_SIZES] = { | ||||
|   {     // 4X4 -> | ||||
|     //                                    4X4 | ||||
|                                           PARTITION_NONE, | ||||
|     // 4X8,            8X4,               8X8 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
|     // 8X16,           16X8,              16X16 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
|     // 16X32,          32X16,             32X32 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
|     PARTITION_INVALID | ||||
|   }, {  // 8X8 | ||||
|     // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 | ||||
|     PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, | ||||
|     // 32X64,          64X32,             64X64 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID | ||||
|   }, {  // 16X16 | ||||
|     // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 | ||||
|     PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, | ||||
|     PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }, {  // 8X8 -> | ||||
|     //                                    4X4 | ||||
|                                           PARTITION_SPLIT, | ||||
|     // 4X8,            8X4,               8X8 | ||||
|     PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE, | ||||
|     // 8X16,           16X8,              16X16 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
|     PARTITION_INVALID, PARTITION_INVALID | ||||
|   }, {  // 32X32 | ||||
|     // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 | ||||
|     PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, | ||||
|     PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, | ||||
|     PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, | ||||
|     PARTITION_INVALID, PARTITION_INVALID | ||||
|   }, {  // 64X64 | ||||
|     // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 | ||||
|     PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, | ||||
|     PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, | ||||
|     PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, | ||||
|     PARTITION_NONE | ||||
|     // 16X32,          32X16,             32X32 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
|     // 32X64,          64X32,             64X64 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,         128x64,            128x128 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }, {  // 16X16 -> | ||||
|     //                                    4X4 | ||||
|                                           PARTITION_SPLIT, | ||||
|     // 4X8,            8X4,               8X8 | ||||
|     PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT, | ||||
|     // 8X16,           16X8,              16X16 | ||||
|     PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE, | ||||
|     // 16X32,          32X16,             32X32 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
|     // 32X64,          64X32,             64X64 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,         128x64,            128x128 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }, {  // 32X32 -> | ||||
|     //                                    4X4 | ||||
|                                           PARTITION_SPLIT, | ||||
|     // 4X8,            8X4,               8X8 | ||||
|     PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT, | ||||
|     // 8X16,           16X8,              16X16 | ||||
|     PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT, | ||||
|     // 16X32,          32X16,             32X32 | ||||
|     PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE, | ||||
|     // 32X64,          64X32,             64X64 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,         128x64,            128x128 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }, {  // 64X64 -> | ||||
|     //                                    4X4 | ||||
|                                           PARTITION_SPLIT, | ||||
|     // 4X8,            8X4,               8X8 | ||||
|     PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT, | ||||
|     // 8X16,           16X8,              16X16 | ||||
|     PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT, | ||||
|     // 16X32,          32X16,             32X32 | ||||
|     PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT, | ||||
|     // 32X64,          64X32,             64X64 | ||||
|     PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,         128x64,            128x128 | ||||
|     PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, | ||||
|   }, {  // 128x128 -> | ||||
|     //                                    4X4 | ||||
|                                           PARTITION_SPLIT, | ||||
|     // 4X8,            8X4,               8X8 | ||||
|     PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT, | ||||
|     // 8X16,           16X8,              16X16 | ||||
|     PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT, | ||||
|     // 16X32,          32X16,             32X32 | ||||
|     PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT, | ||||
|     // 32X64,          64X32,             64X64 | ||||
|     PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT, | ||||
|     // 64x128,         128x64,            128x128 | ||||
|     PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   } | ||||
| }; | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION_TYPES | ||||
| static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES] = { | ||||
|   {     // PARTITION_NONE | ||||
|     BLOCK_4X4,   BLOCK_4X8,   BLOCK_8X4, | ||||
|     BLOCK_8X8,   BLOCK_8X16,  BLOCK_16X8, | ||||
|     BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, | ||||
|     BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, | ||||
|     BLOCK_64X64, | ||||
|   }, {  // PARTITION_HORZ | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_8X4,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_16X8,    BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_32X16,   BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_64X32, | ||||
|   }, {  // PARTITION_VERT | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_4X8,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_8X16,    BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_16X32,   BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_32X64, | ||||
|   }, {  // PARTITION_SPLIT | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_4X4,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_8X8,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_16X16,   BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_32X32, | ||||
|   }, {  // PARTITION_HORZ_A | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_8X4,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_16X8,    BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_32X16,   BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_64X32, | ||||
|   }, {  // PARTITION_HORZ_B | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_8X4,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_16X8,    BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_32X16,   BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_64X32, | ||||
|   }, {  // PARTITION_VERT_A | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_4X8,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_8X16,    BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_16X32,   BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_32X64, | ||||
|   }, {  // PARTITION_VERT_B | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_4X8,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_8X16,    BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_16X32,   BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_32X64, | ||||
|   } | ||||
| }; | ||||
| static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES] = | ||||
| #else | ||||
| static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = { | ||||
| static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = | ||||
| #endif  // CONFIG_EXT_PARTITION_TYPES | ||||
| { | ||||
|   {     // PARTITION_NONE | ||||
|     BLOCK_4X4,   BLOCK_4X8,   BLOCK_8X4, | ||||
|     BLOCK_8X8,   BLOCK_8X16,  BLOCK_16X8, | ||||
|     BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, | ||||
|     BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, | ||||
|     BLOCK_64X64, | ||||
|     //                            4X4 | ||||
|                                   BLOCK_4X4, | ||||
|     // 4X8,        8X4,           8X8 | ||||
|     BLOCK_4X8,     BLOCK_8X4,     BLOCK_8X8, | ||||
|     // 8X16,       16X8,          16X16 | ||||
|     BLOCK_8X16,    BLOCK_16X8,    BLOCK_16X16, | ||||
|     // 16X32,      32X16,         32X32 | ||||
|     BLOCK_16X32,   BLOCK_32X16,   BLOCK_32X32, | ||||
|     // 32X64,      64X32,         64X64 | ||||
|     BLOCK_32X64,   BLOCK_64X32,   BLOCK_64X64, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,     128x64,        128x128 | ||||
|     BLOCK_64X128,  BLOCK_128X64,  BLOCK_128X128, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }, {  // PARTITION_HORZ | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_8X4,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_16X8,    BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_32X16,   BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_64X32, | ||||
|     //                            4X4 | ||||
|                                   BLOCK_INVALID, | ||||
|     // 4X8,        8X4,           8X8 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4, | ||||
|     // 8X16,       16X8,          16X16 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8, | ||||
|     // 16X32,      32X16,         32X32 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16, | ||||
|     // 32X64,      64X32,         64X64 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,     128x64,        128x128 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }, {  // PARTITION_VERT | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_4X8,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_8X16,    BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_16X32,   BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_32X64, | ||||
|     //                            4X4 | ||||
|                                   BLOCK_INVALID, | ||||
|     // 4X8,        8X4,           8X8 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8, | ||||
|     // 8X16,       16X8,          16X16 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16, | ||||
|     // 16X32,      32X16,         32X32 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32, | ||||
|     // 32X64,      64X32,         64X64 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,     128x64,        128x128 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }, {  // PARTITION_SPLIT | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_4X4,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_8X8,     BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_16X16,   BLOCK_INVALID, BLOCK_INVALID, | ||||
|     BLOCK_32X32, | ||||
|     //                            4X4 | ||||
|                                   BLOCK_INVALID, | ||||
|     // 4X8,        8X4,           8X8 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4, | ||||
|     // 8X16,       16X8,          16X16 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8, | ||||
|     // 16X32,      32X16,         32X32 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16, | ||||
|     // 32X64,      64X32,         64X64 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,     128x64,        128x128 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| #if CONFIG_EXT_PARTITION_TYPES | ||||
|   }, {  // PARTITION_HORZ_A | ||||
|     //                            4X4 | ||||
|                                   BLOCK_INVALID, | ||||
|     // 4X8,        8X4,           8X8 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4, | ||||
|     // 8X16,       16X8,          16X16 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8, | ||||
|     // 16X32,      32X16,         32X32 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16, | ||||
|     // 32X64,      64X32,         64X64 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,     128x64,        128x128 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }, {  // PARTITION_HORZ_B | ||||
|     //                            4X4 | ||||
|                                   BLOCK_INVALID, | ||||
|     // 4X8,        8X4,           8X8 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4, | ||||
|     // 8X16,       16X8,          16X16 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8, | ||||
|     // 16X32,      32X16,         32X32 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16, | ||||
|     // 32X64,      64X32,         64X64 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,     128x64,        128x128 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }, {  // PARTITION_VERT_A | ||||
|     //                            4X4 | ||||
|                                   BLOCK_INVALID, | ||||
|     // 4X8,        8X4,           8X8 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8, | ||||
|     // 8X16,       16X8,          16X16 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16, | ||||
|     // 16X32,      32X16,         32X32 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32, | ||||
|     // 32X64,      64X32,         64X64 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,     128x64,        128x128 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }, {  // PARTITION_VERT_B | ||||
|     //                            4X4 | ||||
|                                   BLOCK_INVALID, | ||||
|     // 4X8,        8X4,           8X8 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8, | ||||
|     // 8X16,       16X8,          16X16 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16, | ||||
|     // 16X32,      32X16,         32X32 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32, | ||||
|     // 32X64,      64X32,         64X64 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // 64x128,     128x64,        128x128 | ||||
|     BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| #endif  // CONFIG_EXT_PARTITION_TYPES | ||||
|   } | ||||
| }; | ||||
| #endif  // CONFIG_EXT_PARTITION_TYPES | ||||
|  | ||||
| static const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = { | ||||
|   TX_4X4,   TX_4X4,   TX_4X4, | ||||
|   TX_8X8,   TX_8X8,   TX_8X8, | ||||
|   TX_16X16, TX_16X16, TX_16X16, | ||||
|   TX_32X32, TX_32X32, TX_32X32, TX_32X32 | ||||
|   //                   4X4 | ||||
|                        TX_4X4, | ||||
|   // 4X8,    8X4,      8X8 | ||||
|   TX_4X4,    TX_4X4,   TX_8X8, | ||||
|   // 8X16,   16X8,     16X16 | ||||
|   TX_8X8,    TX_8X8,   TX_16X16, | ||||
|   // 16X32,  32X16,    32X32 | ||||
|   TX_16X16,  TX_16X16, TX_32X32, | ||||
|   // 32X64,  64X32,    64X64 | ||||
|   TX_32X32,  TX_32X32, TX_32X32, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   // 64x128, 128x64,   128x128 | ||||
|   TX_32X32,  TX_32X32, TX_32X32, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| static const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = { | ||||
| @@ -200,6 +328,11 @@ static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = { | ||||
|   {{BLOCK_32X64, BLOCK_32X32},   {BLOCK_INVALID, BLOCK_16X32}}, | ||||
|   {{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32,   BLOCK_32X16}}, | ||||
|   {{BLOCK_64X64, BLOCK_64X32},   {BLOCK_32X64,   BLOCK_32X32}}, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   {{BLOCK_64X128, BLOCK_64X64},   {BLOCK_INVALID, BLOCK_32X64}}, | ||||
|   {{BLOCK_128X64, BLOCK_INVALID}, {BLOCK_64X64,   BLOCK_64X32}}, | ||||
|   {{BLOCK_128X128, BLOCK_128X64}, {BLOCK_64X128,  BLOCK_64X64}}, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| // Generates 4 bit field in which each bit set to 1 represents | ||||
| @@ -209,6 +342,24 @@ static const struct { | ||||
|   PARTITION_CONTEXT above; | ||||
|   PARTITION_CONTEXT left; | ||||
| } partition_context_lookup[BLOCK_SIZES]= { | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   {31, 31},  // 4X4   - {0b11111, 0b11111} | ||||
|   {31, 30},  // 4X8   - {0b11111, 0b11110} | ||||
|   {30, 31},  // 8X4   - {0b11110, 0b11111} | ||||
|   {30, 30},  // 8X8   - {0b11110, 0b11110} | ||||
|   {30, 28},  // 8X16  - {0b11110, 0b11100} | ||||
|   {28, 30},  // 16X8  - {0b11100, 0b11110} | ||||
|   {28, 28},  // 16X16 - {0b11100, 0b11100} | ||||
|   {28, 24},  // 16X32 - {0b11100, 0b11000} | ||||
|   {24, 28},  // 32X16 - {0b11000, 0b11100} | ||||
|   {24, 24},  // 32X32 - {0b11000, 0b11000} | ||||
|   {24, 16},  // 32X64 - {0b11000, 0b10000} | ||||
|   {16, 24},  // 64X32 - {0b10000, 0b11000} | ||||
|   {16, 16},  // 64X64 - {0b10000, 0b10000} | ||||
|   {16, 0 },  // 64X128- {0b10000, 0b00000} | ||||
|   {0,  16},  // 128X64- {0b00000, 0b10000} | ||||
|   {0,  0 },  // 128X128-{0b00000, 0b00000} | ||||
| #else | ||||
|   {15, 15},  // 4X4   - {0b1111, 0b1111} | ||||
|   {15, 14},  // 4X8   - {0b1111, 0b1110} | ||||
|   {14, 15},  // 8X4   - {0b1110, 0b1111} | ||||
| @@ -222,6 +373,7 @@ static const struct { | ||||
|   {8,  0 },  // 32X64 - {0b1000, 0b0000} | ||||
|   {0,  8 },  // 64X32 - {0b0000, 0b1000} | ||||
|   {0,  0 },  // 64X64 - {0b0000, 0b0000} | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| #if CONFIG_SUPERTX | ||||
|   | ||||
| @@ -171,6 +171,13 @@ static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] | ||||
|   {  72,  16,  44, 128, 128, 128, 128 },  // a split, l not split | ||||
|   {  58,  32,  12, 128, 128, 128, 128 },  // l split, a not split | ||||
|   {  10,   7,   6, 128, 128, 128, 128 },  // a/l both split | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   // 128x128 -> 64x64 | ||||
|   { 222,  34,  30, 128, 128, 128, 128 },  // a/l both not split | ||||
|   {  72,  16,  44, 128, 128, 128, 128 },  // a split, l not split | ||||
|   {  58,  32,  12, 128, 128, 128, 128 },  // l split, a not split | ||||
|   {  10,   7,   6, 128, 128, 128, 128 },  // a/l both split | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
| #else | ||||
| static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] | ||||
| @@ -195,6 +202,13 @@ static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] | ||||
|   {  72,  16,  44 },  // a split, l not split | ||||
|   {  58,  32,  12 },  // l split, a not split | ||||
|   {  10,   7,   6 },  // a/l both split | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   // 128x128 -> 64x64 | ||||
|   { 222,  34,  30 },  // a/l both not split | ||||
|   {  72,  16,  44 },  // a split, l not split | ||||
|   {  58,  32,  12 },  // l split, a not split | ||||
|   {  10,   7,   6 },  // a/l both split | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
| #endif  // CONFIG_EXT_PARTITION_TYPES | ||||
|  | ||||
| @@ -256,20 +270,33 @@ static const vpx_prob default_inter_compound_mode_probs | ||||
|  | ||||
| static const vpx_prob default_interintra_prob[BLOCK_SIZES] = { | ||||
|   192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   192, 192, 192 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = { | ||||
|   192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   192, 192, 192 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = { | ||||
|   192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   192, 192, 192 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
| #endif  // CONFIG_EXT_INTER | ||||
|  | ||||
| #if CONFIG_OBMC | ||||
| static const vpx_prob default_obmc_prob[BLOCK_SIZES] = { | ||||
|     255, 255, 255, 151, 153, 144, 178, 165, 160, 207, 195, 168, 244, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     // TODO(debargha) What are the correct values for these? | ||||
|     192, 192, 192 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
| #endif  // CONFIG_OBMC | ||||
|  | ||||
| @@ -389,6 +416,11 @@ vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = { | ||||
|     { 180, 113, 136,  49,  45, 114}, | ||||
|     { 107,  70,  87,  49, 154, 156}, | ||||
|     {  98, 105, 142,  63,  64, 152}, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     {  98, 105, 142,  63,  64, 152}, | ||||
|     {  98, 105, 142,  63,  64, 152}, | ||||
|     {  98, 105, 142,  63,  64, 152}, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| const vpx_prob | ||||
| @@ -403,6 +435,11 @@ vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = { | ||||
|     {  67,  53,  54,  55,  66,  93}, | ||||
|     { 120, 130,  83, 171,  75, 214}, | ||||
|     {  72,  55,  66,  68,  79, 107}, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     {  72,  55,  66,  68,  79, 107}, | ||||
|     {  72,  55,  66,  68,  79, 107}, | ||||
|     {  72,  55,  66,  68,  79, 107}, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| const vpx_prob | ||||
| @@ -418,6 +455,11 @@ vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS] | ||||
|     { 240,  180,  100, }, | ||||
|     { 240,  180,  100, }, | ||||
|     { 240,  180,  100, }, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     { 240,  180,  100, }, | ||||
|     { 240,  180,  100, }, | ||||
|     { 240,  180,  100, }, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -32,7 +32,7 @@ extern "C" { | ||||
|  | ||||
| #define PALETTE_COLOR_CONTEXTS 16 | ||||
| #define PALETTE_MAX_SIZE 8 | ||||
| #define PALETTE_BLOCK_SIZES (BLOCK_64X64 - BLOCK_8X8 + 1) | ||||
| #define PALETTE_BLOCK_SIZES (BLOCK_LARGEST - BLOCK_8X8 + 1) | ||||
| #define PALETTE_Y_MODE_CONTEXTS 3 | ||||
|  | ||||
| struct VP10Common; | ||||
|   | ||||
| @@ -18,13 +18,25 @@ | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| #define MI_SIZE_LOG2 3 | ||||
| #define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2)  // 64 = 2^6 | ||||
| #undef MAX_SB_SIZE | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION | ||||
| # define MAX_SB_SIZE_LOG2 7 | ||||
| #else | ||||
| # define MAX_SB_SIZE_LOG2 6 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
| #define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2) | ||||
| #define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE) | ||||
|  | ||||
| #define MI_SIZE_LOG2 3 | ||||
| #define MI_SIZE (1 << MI_SIZE_LOG2)  // pixels per mi-unit | ||||
|  | ||||
| #define MI_BLOCK_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2) | ||||
| #define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2)  // mi-units per max block | ||||
|  | ||||
| #define MI_MASK (MI_BLOCK_SIZE - 1) | ||||
| #define MI_MASK_2 (MI_BLOCK_SIZE * 2 - 1) | ||||
|  | ||||
| #if CONFIG_EXT_TILE | ||||
| # define  MAX_TILE_ROWS 1024 | ||||
| @@ -49,32 +61,29 @@ typedef enum BITSTREAM_PROFILE { | ||||
|   MAX_PROFILES | ||||
| } BITSTREAM_PROFILE; | ||||
|  | ||||
| #define BLOCK_4X4      0 | ||||
| #define BLOCK_4X8      1 | ||||
| #define BLOCK_8X4      2 | ||||
| #define BLOCK_8X8      3 | ||||
| #define BLOCK_8X16     4 | ||||
| #define BLOCK_16X8     5 | ||||
| #define BLOCK_16X16    6 | ||||
| #define BLOCK_16X32    7 | ||||
| #define BLOCK_32X16    8 | ||||
| #define BLOCK_32X32    9 | ||||
| #define BLOCK_32X64   10 | ||||
| #define BLOCK_64X32   11 | ||||
| #define BLOCK_64X64   12 | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION | ||||
| #define BLOCK_64X128  13 | ||||
| #define BLOCK_128X64  14 | ||||
| #define BLOCK_128X128 15 | ||||
| #define BLOCK_SIZES   16 | ||||
| #define BLOCK_4X4       0 | ||||
| #define BLOCK_4X8       1 | ||||
| #define BLOCK_8X4       2 | ||||
| #define BLOCK_8X8       3 | ||||
| #define BLOCK_8X16      4 | ||||
| #define BLOCK_16X8      5 | ||||
| #define BLOCK_16X16     6 | ||||
| #define BLOCK_16X32     7 | ||||
| #define BLOCK_32X16     8 | ||||
| #define BLOCK_32X32     9 | ||||
| #define BLOCK_32X64    10 | ||||
| #define BLOCK_64X32    11 | ||||
| #define BLOCK_64X64    12 | ||||
| #if !CONFIG_EXT_PARTITION | ||||
| # define BLOCK_SIZES   13 | ||||
| #else | ||||
| #define BLOCK_SIZES   13 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
| #define BLOCK_INVALID (BLOCK_SIZES) | ||||
| # define BLOCK_64X128  13 | ||||
| # define BLOCK_128X64  14 | ||||
| # define BLOCK_128X128 15 | ||||
| # define BLOCK_SIZES   16 | ||||
| #endif  // !CONFIG_EXT_PARTITION | ||||
| #define BLOCK_INVALID BLOCK_SIZES | ||||
| #define BLOCK_LARGEST (BLOCK_SIZES - 1) | ||||
|  | ||||
| typedef uint8_t BLOCK_SIZE; | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION_TYPES | ||||
| @@ -104,7 +113,11 @@ typedef enum PARTITION_TYPE { | ||||
|  | ||||
| typedef char PARTITION_CONTEXT; | ||||
| #define PARTITION_PLOFFSET   4  // number of probability models per block size | ||||
| #define PARTITION_CONTEXTS   (4 * PARTITION_PLOFFSET) | ||||
| #if CONFIG_EXT_PARTITION | ||||
| # define PARTITION_CONTEXTS  (5 * PARTITION_PLOFFSET) | ||||
| #else | ||||
| # define PARTITION_CONTEXTS  (4 * PARTITION_PLOFFSET) | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
| // block transform size | ||||
| typedef uint8_t TX_SIZE; | ||||
| @@ -114,6 +127,15 @@ typedef uint8_t TX_SIZE; | ||||
| #define TX_32X32 ((TX_SIZE)3)   // 32x32 transform | ||||
| #define TX_SIZES ((TX_SIZE)4) | ||||
|  | ||||
| #define MAX_TX_SIZE_LOG2  5 | ||||
| #define MAX_TX_SIZE       (1 << MAX_TX_SIZE_LOG2) | ||||
| #define MAX_TX_SQUARE     (MAX_TX_SIZE * MAX_TX_SIZE) | ||||
|  | ||||
| // Number of maxium size transform blocks in the maximum size superblock | ||||
| #define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 \ | ||||
|   ((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2) | ||||
| #define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2) | ||||
|  | ||||
| // frame transform mode | ||||
| typedef enum { | ||||
|   ONLY_4X4            = 0,        // only 4x4 transform used | ||||
| @@ -286,10 +308,15 @@ typedef enum { | ||||
|  | ||||
| /* Segment Feature Masks */ | ||||
| #define MAX_MV_REF_CANDIDATES 2 | ||||
|  | ||||
| #if CONFIG_REF_MV | ||||
| #define MAX_REF_MV_STACK_SIZE 16 | ||||
| #define REF_CAT_LEVEL  160 | ||||
| #endif | ||||
| #if CONFIG_EXT_PARTITION | ||||
| #define REF_CAT_LEVEL 640 | ||||
| #else | ||||
| #define REF_CAT_LEVEL 160 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| #endif  // CONFIG_REF_MV | ||||
|  | ||||
| #define INTRA_INTER_CONTEXTS 4 | ||||
| #define COMP_INTER_CONTEXTS 5 | ||||
|   | ||||
| @@ -871,6 +871,9 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, | ||||
|                         cm->mi_rows - mi_row : MI_BLOCK_SIZE); | ||||
|   const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? | ||||
|                         cm->mi_cols - mi_col : MI_BLOCK_SIZE); | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   assert(0 && "Not yet updated"); | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
|   vp10_zero(*lfm); | ||||
|   assert(mip[0] != NULL); | ||||
| @@ -1045,8 +1048,10 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, | ||||
|     const uint64_t rows = cm->mi_rows - mi_row; | ||||
|  | ||||
|     // Each pixel inside the border gets a 1, | ||||
|     const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1); | ||||
|     const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1); | ||||
|     const uint64_t mask_y = | ||||
|       (((uint64_t) 1 << (rows << MI_BLOCK_SIZE_LOG2)) - 1); | ||||
|     const uint16_t mask_uv = | ||||
|       (((uint16_t) 1 << (((rows + 1) >> 1) << (MI_BLOCK_SIZE_LOG2 - 1))) - 1); | ||||
|  | ||||
|     // Remove values completely outside our border. | ||||
|     for (i = 0; i < TX_32X32; i++) { | ||||
| @@ -1262,7 +1267,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, | ||||
|  | ||||
|       int tx_size_mask = 0; | ||||
|       // Filter level can vary per MI | ||||
|       if (!(lfl[(r << 3) + (c >> ss_x)] = | ||||
|       if (!(lfl[(r << MI_BLOCK_SIZE_LOG2) + (c >> ss_x)] = | ||||
|             get_filter_level(&cm->lf_info, mbmi))) | ||||
|         continue; | ||||
|  | ||||
| @@ -1280,11 +1285,13 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, | ||||
|                                 sb_type, ss_x, ss_y) : | ||||
|             mbmi->inter_tx_size[blk_row][blk_col]; | ||||
|  | ||||
|       tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]); | ||||
|       tx_size_c = VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & 0x07]); | ||||
|       tx_size_r = VPXMIN(tx_size, | ||||
|                          cm->above_txfm_context[mi_col + c]); | ||||
|       tx_size_c = VPXMIN(tx_size, | ||||
|                          cm->left_txfm_context[(mi_row + r) & MI_MASK]); | ||||
|  | ||||
|       cm->above_txfm_context[mi_col + c] = tx_size; | ||||
|       cm->left_txfm_context[(mi_row + r) & 0x07] = tx_size; | ||||
|       cm->left_txfm_context[(mi_row + r) & MI_MASK] = tx_size; | ||||
| #endif | ||||
|  | ||||
|       // Build masks based on the transform size of each block | ||||
| @@ -1351,21 +1358,22 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, | ||||
|     border_mask = ~(mi_col == 0); | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|     if (cm->use_highbitdepth) { | ||||
|       highbd_filter_selectively_vert(CONVERT_TO_SHORTPTR(dst->buf), | ||||
|                                      dst->stride, | ||||
|                                      mask_16x16_c & border_mask, | ||||
|                                      mask_8x8_c & border_mask, | ||||
|                                      mask_4x4_c & border_mask, | ||||
|                                      mask_4x4_int[r], | ||||
|                                      &cm->lf_info, &lfl[r << 3], | ||||
|                                      (int)cm->bit_depth); | ||||
|       highbd_filter_selectively_vert( | ||||
|           CONVERT_TO_SHORTPTR(dst->buf), | ||||
|           dst->stride, | ||||
|           mask_16x16_c & border_mask, | ||||
|           mask_8x8_c & border_mask, | ||||
|           mask_4x4_c & border_mask, | ||||
|           mask_4x4_int[r], | ||||
|           &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2], | ||||
|           (int)cm->bit_depth); | ||||
|     } else { | ||||
|       filter_selectively_vert(dst->buf, dst->stride, | ||||
|                               mask_16x16_c & border_mask, | ||||
|                               mask_8x8_c & border_mask, | ||||
|                               mask_4x4_c & border_mask, | ||||
|                               mask_4x4_int[r], | ||||
|                               &cm->lf_info, &lfl[r << 3]); | ||||
|                               &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); | ||||
|     } | ||||
| #else | ||||
|     filter_selectively_vert(dst->buf, dst->stride, | ||||
| @@ -1373,7 +1381,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, | ||||
|                             mask_8x8_c & border_mask, | ||||
|                             mask_4x4_c & border_mask, | ||||
|                             mask_4x4_int[r], | ||||
|                             &cm->lf_info, &lfl[r << 3]); | ||||
|                             &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|     dst->buf += 8 * dst->stride; | ||||
|     mi_8x8 += row_step_stride; | ||||
| @@ -1400,21 +1408,22 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, | ||||
|     } | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|     if (cm->use_highbitdepth) { | ||||
|       highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), | ||||
|                                       dst->stride, | ||||
|                                       mask_16x16_r, | ||||
|                                       mask_8x8_r, | ||||
|                                       mask_4x4_r, | ||||
|                                       mask_4x4_int_r, | ||||
|                                       &cm->lf_info, &lfl[r << 3], | ||||
|                                       (int)cm->bit_depth); | ||||
|       highbd_filter_selectively_horiz( | ||||
|           CONVERT_TO_SHORTPTR(dst->buf), | ||||
|           dst->stride, | ||||
|           mask_16x16_r, | ||||
|           mask_8x8_r, | ||||
|           mask_4x4_r, | ||||
|           mask_4x4_int_r, | ||||
|           &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2], | ||||
|           (int)cm->bit_depth); | ||||
|     } else { | ||||
|       filter_selectively_horiz(dst->buf, dst->stride, | ||||
|                                mask_16x16_r, | ||||
|                                mask_8x8_r, | ||||
|                                mask_4x4_r, | ||||
|                                mask_4x4_int_r, | ||||
|                                &cm->lf_info, &lfl[r << 3]); | ||||
|                                &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); | ||||
|     } | ||||
| #else | ||||
|     filter_selectively_horiz(dst->buf, dst->stride, | ||||
| @@ -1422,7 +1431,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, | ||||
|                              mask_8x8_r, | ||||
|                              mask_4x4_r, | ||||
|                              mask_4x4_int_r, | ||||
|                              &cm->lf_info, &lfl[r << 3]); | ||||
|                              &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|     dst->buf += 8 * dst->stride; | ||||
|   } | ||||
| @@ -1455,16 +1464,18 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm, | ||||
|       highbd_filter_selectively_vert_row2( | ||||
|           plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, | ||||
|           mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, | ||||
|           &lfm->lfl_y[r << 3], (int)cm->bit_depth); | ||||
|           &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2], (int)cm->bit_depth); | ||||
|     } else { | ||||
|       filter_selectively_vert_row2( | ||||
|           plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, | ||||
|           mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); | ||||
|           mask_4x4_l, mask_4x4_int_l, &cm->lf_info, | ||||
|           &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); | ||||
|     } | ||||
| #else | ||||
|     filter_selectively_vert_row2( | ||||
|         plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, | ||||
|         mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); | ||||
|         mask_4x4_l, mask_4x4_int_l, &cm->lf_info, | ||||
|         &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|     dst->buf += 16 * dst->stride; | ||||
|     mask_16x16 >>= 16; | ||||
| @@ -1499,17 +1510,18 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm, | ||||
|     if (cm->use_highbitdepth) { | ||||
|       highbd_filter_selectively_horiz( | ||||
|           CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, | ||||
|           mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3], | ||||
|           mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, | ||||
|           &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2], | ||||
|           (int)cm->bit_depth); | ||||
|     } else { | ||||
|       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, | ||||
|                                mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, | ||||
|                                &lfm->lfl_y[r << 3]); | ||||
|                                &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); | ||||
|     } | ||||
| #else | ||||
|     filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, | ||||
|                              mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, | ||||
|                              &lfm->lfl_y[r << 3]); | ||||
|                              &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|  | ||||
|     dst->buf += 8 * dst->stride; | ||||
| @@ -1539,8 +1551,10 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm, | ||||
|   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { | ||||
|     if (plane->plane_type == 1) { | ||||
|       for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { | ||||
|         lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; | ||||
|         lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; | ||||
|         lfm->lfl_uv[(r << 1) + c] = | ||||
|           lfm->lfl_y[(r << MI_BLOCK_SIZE_LOG2) + (c << 1)]; | ||||
|         lfm->lfl_uv[((r + 2) << 1) + c] = | ||||
|           lfm->lfl_y[((r + 2) << MI_BLOCK_SIZE_LOG2) + (c << 1)]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
| @@ -1632,9 +1646,31 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, | ||||
|                            VP10_COMMON *cm, | ||||
|                            struct macroblockd_plane planes[MAX_MB_PLANE], | ||||
|                            int start, int stop, int y_only) { | ||||
| #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES | ||||
|   const int num_planes = y_only ? 1 : MAX_MB_PLANE; | ||||
|   int mi_row, mi_col; | ||||
|  | ||||
| # if CONFIG_VAR_TX | ||||
|   memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols); | ||||
| # endif  // CONFIG_VAR_TX | ||||
|   for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { | ||||
|     MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; | ||||
| # if CONFIG_VAR_TX | ||||
|     memset(cm->left_txfm_context, TX_SIZES, MI_BLOCK_SIZE); | ||||
| # endif  // CONFIG_VAR_TX | ||||
|     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { | ||||
|       int plane; | ||||
|  | ||||
|       vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); | ||||
|  | ||||
|       for (plane = 0; plane < num_planes; ++plane) | ||||
|         vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, | ||||
|                                        mi_row, mi_col); | ||||
|     } | ||||
|   } | ||||
| #else | ||||
|   const int num_planes = y_only ? 1 : MAX_MB_PLANE; | ||||
|   int mi_row, mi_col; | ||||
| #if !CONFIG_VAR_TX && !CONFIG_EXT_PARTITION_TYPES | ||||
|   enum lf_path path; | ||||
|   LOOP_FILTER_MASK lfm; | ||||
|  | ||||
| @@ -1646,29 +1682,17 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, | ||||
|     path = LF_PATH_444; | ||||
|   else | ||||
|     path = LF_PATH_SLOW; | ||||
| #endif  // !CONFIG_VAR_TX && !CONFIG_EXT_PARTITION_TYPES | ||||
|  | ||||
| #if CONFIG_VAR_TX | ||||
|   memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols); | ||||
| #endif | ||||
|   for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { | ||||
|     MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; | ||||
| #if CONFIG_VAR_TX | ||||
|     memset(cm->left_txfm_context, TX_SIZES, 8); | ||||
| #endif | ||||
|     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { | ||||
|       int plane; | ||||
|  | ||||
|       vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); | ||||
|  | ||||
| #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION_TYPES | ||||
|       for (plane = 0; plane < num_planes; ++plane) | ||||
|         vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, | ||||
|                                        mi_row, mi_col); | ||||
| #else | ||||
|       // TODO(JBB): Make setup_mask work for non 420. | ||||
|       vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, | ||||
|                      &lfm); | ||||
|       vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); | ||||
|  | ||||
|       vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); | ||||
|       for (plane = 1; plane < num_planes; ++plane) { | ||||
|         switch (path) { | ||||
| @@ -1684,9 +1708,9 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, | ||||
|             break; | ||||
|         } | ||||
|       } | ||||
| #endif  // CONFIG_VAR_TX || CONFIG_EXT_PARTITION_TYPES | ||||
|     } | ||||
|   } | ||||
| #endif  // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES | ||||
| } | ||||
|  | ||||
| void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, | ||||
|   | ||||
| @@ -84,8 +84,8 @@ typedef struct { | ||||
|   uint16_t above_uv[TX_SIZES]; | ||||
|   uint16_t left_int_4x4_uv; | ||||
|   uint16_t above_int_4x4_uv; | ||||
|   uint8_t lfl_y[64]; | ||||
|   uint8_t lfl_uv[16]; | ||||
|   uint8_t lfl_y[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; | ||||
|   uint8_t lfl_uv[MI_BLOCK_SIZE / 2 * MI_BLOCK_SIZE / 2]; | ||||
| } LOOP_FILTER_MASK; | ||||
|  | ||||
| /* assorted loopfilter functions which get used elsewhere */ | ||||
|   | ||||
| @@ -12,6 +12,7 @@ | ||||
| #include "vp10/common/mvref_common.h" | ||||
|  | ||||
| #if CONFIG_REF_MV | ||||
|  | ||||
| static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, | ||||
|                                     const MB_MODE_INFO *const candidate, | ||||
|                                     const MV_REFERENCE_FRAME rf[2], | ||||
| @@ -23,6 +24,8 @@ static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, | ||||
|   int index = 0, ref; | ||||
|   int newmv_count = 0; | ||||
|  | ||||
|   assert(2 * weight < REF_CAT_LEVEL); | ||||
|  | ||||
|   if (rf[1] == NONE) { | ||||
|     // single reference frame | ||||
|     for (ref = 0; ref < 2; ++ref) { | ||||
| @@ -246,32 +249,30 @@ static uint8_t scan_blk_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, | ||||
|   return newmv_count; | ||||
| } | ||||
|  | ||||
| // This function assumes MI blocks are 8x8 and coding units are 64x64 | ||||
| static int has_top_right(const MACROBLOCKD *xd, | ||||
|                          int mi_row, int mi_col, int bs) { | ||||
|   // In a split partition all apart from the bottom right has a top right | ||||
|   int has_tr = !((mi_row & bs) & (bs * 2 - 1)) || | ||||
|                !((mi_col & bs) & (bs * 2 - 1)); | ||||
|   int has_tr = !((mi_row & bs) && (mi_col & bs)); | ||||
|  | ||||
|   // bs > 0 and bs is a power of 2 | ||||
|   assert(bs > 0 && !(bs & (bs - 1))); | ||||
|  | ||||
|   // Filter out partial right-most boundaries | ||||
|   // For each 4x4 group of blocks, when the bottom right is decoded the blocks | ||||
|   // to the right have not been decoded therefore the second from bottom in the | ||||
|   // right-most column does not have a top right | ||||
|   if ((mi_col & bs) & (bs * 2 - 1)) { | ||||
|     if (((mi_col & (2 * bs)) & (bs * 4 - 1)) && | ||||
|         ((mi_row & (2 * bs)) & (bs * 4 - 1))) | ||||
|       has_tr = 0; | ||||
|   // to the right have not been decoded therefore the bottom right does | ||||
|   // not have a top right | ||||
|   while (bs < MI_BLOCK_SIZE) { | ||||
|     if (mi_col & bs) { | ||||
|       if ((mi_col & (2 * bs)) && (mi_row & (2 * bs))) { | ||||
|         has_tr = 0; | ||||
|         break; | ||||
|       } | ||||
|     } else { | ||||
|       break; | ||||
|     } | ||||
|     bs <<= 1; | ||||
|   } | ||||
|  | ||||
|   // If the right had side of the block lines up with the right had edge end of | ||||
|   // a group of 8x8 MI blocks (i.e. edge of a coding unit) and is not on the top | ||||
|   // row of that coding unit, it does not have a top right | ||||
|   if (has_tr) | ||||
|     if (((mi_col + xd->n8_w) & 0x07) == 0) | ||||
|       if ((mi_row & 0x07) > 0) | ||||
|         has_tr = 0; | ||||
|  | ||||
|   // The left had of two vertical rectangles always has a top right (as the | ||||
|   // The left hand of two vertical rectangles always has a top right (as the | ||||
|   // block above will have been decoded) | ||||
|   if (xd->n8_w < xd->n8_h) | ||||
|     if (!xd->is_sec_rect) | ||||
| @@ -359,8 +360,11 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd, | ||||
|  | ||||
|   nearest_refmv_count = *refmv_count; | ||||
|  | ||||
|   for (idx = 0; idx < nearest_refmv_count; ++idx) | ||||
|   for (idx = 0; idx < nearest_refmv_count; ++idx) { | ||||
|     assert(ref_mv_stack[idx].weight > 0 && | ||||
|            ref_mv_stack[idx].weight < REF_CAT_LEVEL); | ||||
|     ref_mv_stack[idx].weight += REF_CAT_LEVEL; | ||||
|   } | ||||
|  | ||||
|   if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame | ||||
|       && rf[1] == NONE) { | ||||
|   | ||||
| @@ -120,7 +120,16 @@ static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = { | ||||
|   // 64X32 | ||||
|   {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}}, | ||||
|   // 64X64 | ||||
|   {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}} | ||||
|   {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   // TODO(debargha/jingning) Making them twice the 32x64, .. ones above | ||||
|   // 64x128 | ||||
|   {{0, -2}, {-2, 0}, {8, -2}, {-2, 4}, {-2, -2}, {0, -6}, {-6, 0}, {4, -2}}, | ||||
|   // 128x64 | ||||
|   {{-2, 0}, {0, -2}, {-2, 8}, {4, -2}, {-2, -2}, {-6, 0}, {0, -6}, {-2, 4}}, | ||||
|   // 128x128 | ||||
|   {{-2, 6}, {6, -2}, {-2, 8}, {8, -2}, {-2, -2}, {-2, 0}, {0, -2}, {-2, 12}}, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| static const int idx_n_column_to_subblock[4][2] = { | ||||
| @@ -131,7 +140,11 @@ static const int idx_n_column_to_subblock[4][2] = { | ||||
| }; | ||||
|  | ||||
| // clamp_mv_ref | ||||
| #define MV_BORDER (8 << 3)  // Allow 8 pels in 1/8th pel units | ||||
| #if CONFIG_EXT_PARTITION | ||||
| # define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units | ||||
| #else | ||||
| # define MV_BORDER (8 << 3)   // Allow 8 pels in 1/8th pel units | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
| static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) { | ||||
|   clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER, | ||||
|   | ||||
| @@ -332,7 +332,7 @@ typedef struct VP10Common { | ||||
|   ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; | ||||
| #if CONFIG_VAR_TX | ||||
|   TXFM_CONTEXT *above_txfm_context; | ||||
|   TXFM_CONTEXT left_txfm_context[8]; | ||||
|   TXFM_CONTEXT left_txfm_context[MI_BLOCK_SIZE]; | ||||
| #endif | ||||
|   int above_context_alloc_cols; | ||||
|  | ||||
| @@ -440,7 +440,7 @@ static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd, | ||||
|  | ||||
| static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { | ||||
|   const int above_idx = mi_col * 2; | ||||
|   const int left_idx = (mi_row * 2) & 15;  // FIXME: Mask should be CU_SIZE*2-1 | ||||
|   const int left_idx = (mi_row * 2) & MI_MASK_2; | ||||
|   int i; | ||||
|   for (i = 0; i < MAX_MB_PLANE; ++i) { | ||||
|     struct macroblockd_plane *const pd = &xd->plane[i]; | ||||
|   | ||||
| @@ -454,52 +454,52 @@ void vp10_make_masked_inter_predictor( | ||||
|     const MACROBLOCKD *xd) { | ||||
|   const MODE_INFO *mi = xd->mi[0]; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE]; | ||||
|   uint8_t tmp_dst_[2 * MAX_SB_SQUARE]; | ||||
|   uint8_t *tmp_dst = | ||||
|       (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? | ||||
|       CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; | ||||
|   vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE, | ||||
|   vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, | ||||
|                             subpel_x, subpel_y, sf, w, h, 0, | ||||
|                             interp_filter, xs, ys, xd); | ||||
| #if CONFIG_SUPERTX | ||||
|   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) | ||||
|     build_masked_compound_extend_highbd( | ||||
|         dst, dst_stride, tmp_dst, CU_SIZE, plane, | ||||
|         dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane, | ||||
|         mi->mbmi.interinter_wedge_index, | ||||
|         mi->mbmi.sb_type, | ||||
|         wedge_offset_y, wedge_offset_x, h, w); | ||||
|   else | ||||
|     build_masked_compound_extend( | ||||
|         dst, dst_stride, tmp_dst, CU_SIZE, plane, | ||||
|         dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane, | ||||
|         mi->mbmi.interinter_wedge_index, | ||||
|         mi->mbmi.sb_type, | ||||
|         wedge_offset_y, wedge_offset_x, h, w); | ||||
| #else | ||||
|   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) | ||||
|     build_masked_compound_highbd( | ||||
|         dst, dst_stride, tmp_dst, CU_SIZE, | ||||
|         dst, dst_stride, tmp_dst, MAX_SB_SIZE, | ||||
|         mi->mbmi.interinter_wedge_index, | ||||
|         mi->mbmi.sb_type, h, w); | ||||
|   else | ||||
|     build_masked_compound( | ||||
|         dst, dst_stride, tmp_dst, CU_SIZE, | ||||
|         dst, dst_stride, tmp_dst, MAX_SB_SIZE, | ||||
|         mi->mbmi.interinter_wedge_index, | ||||
|         mi->mbmi.sb_type, h, w); | ||||
| #endif  // CONFIG_SUPERTX | ||||
| #else   // CONFIG_VP9_HIGHBITDEPTH | ||||
|   uint8_t tmp_dst[CU_SIZE * CU_SIZE]; | ||||
|   vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE, | ||||
|   uint8_t tmp_dst[MAX_SB_SQUARE]; | ||||
|   vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, | ||||
|                             subpel_x, subpel_y, sf, w, h, 0, | ||||
|                             interp_filter, xs, ys, xd); | ||||
| #if CONFIG_SUPERTX | ||||
|   build_masked_compound_extend( | ||||
|       dst, dst_stride, tmp_dst, CU_SIZE, plane, | ||||
|       dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane, | ||||
|       mi->mbmi.interinter_wedge_index, | ||||
|       mi->mbmi.sb_type, | ||||
|       wedge_offset_y, wedge_offset_x, h, w); | ||||
| #else | ||||
|   build_masked_compound( | ||||
|       dst, dst_stride, tmp_dst, CU_SIZE, | ||||
|       dst, dst_stride, tmp_dst, MAX_SB_SIZE, | ||||
|       mi->mbmi.interinter_wedge_index, | ||||
|       mi->mbmi.sb_type, h, w); | ||||
| #endif  // CONFIG_SUPERTX | ||||
| @@ -877,12 +877,13 @@ void vp10_build_masked_inter_predictor_complex( | ||||
|     int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, | ||||
|     PARTITION_TYPE partition, int plane) { | ||||
|   int i, j; | ||||
|   uint8_t mask[MAXTXLEN]; | ||||
|   int top_w = 4 << b_width_log2_lookup[top_bsize], | ||||
|       top_h = 4 << b_height_log2_lookup[top_bsize]; | ||||
|   int w = 4 << b_width_log2_lookup[bsize], h = 4 << b_height_log2_lookup[bsize]; | ||||
|   int w_offset = (mi_col - mi_col_ori) << 3, | ||||
|       h_offset = (mi_row - mi_row_ori) << 3; | ||||
|   uint8_t mask[MAX_TX_SIZE]; | ||||
|   int top_w = 4 << b_width_log2_lookup[top_bsize]; | ||||
|   int top_h = 4 << b_height_log2_lookup[top_bsize]; | ||||
|   int w = 4 << b_width_log2_lookup[bsize]; | ||||
|   int h = 4 << b_height_log2_lookup[bsize]; | ||||
|   int w_offset = (mi_col - mi_col_ori) * MI_SIZE; | ||||
|   int h_offset = (mi_row - mi_row_ori) * MI_SIZE; | ||||
|  | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   uint16_t *dst16= CONVERT_TO_SHORTPTR(dst); | ||||
| @@ -890,6 +891,8 @@ void vp10_build_masked_inter_predictor_complex( | ||||
|   int b_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|  | ||||
|   assert(bsize <= BLOCK_32X32); | ||||
|  | ||||
|   top_w >>= pd->subsampling_x; | ||||
|   top_h >>= pd->subsampling_y; | ||||
|   w >>= pd->subsampling_x; | ||||
| @@ -916,7 +919,8 @@ void vp10_build_masked_inter_predictor_complex( | ||||
|             if (m == 0) | ||||
|               dst_tmp[j] = dst2_tmp[j]; | ||||
|             else | ||||
|               dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; | ||||
|               dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m + | ||||
|                                               dst2_tmp[j] * (64 - m), 6); | ||||
|           } | ||||
|           dst_tmp += dst_stride; | ||||
|           dst2_tmp += dst2_stride; | ||||
| @@ -943,7 +947,8 @@ void vp10_build_masked_inter_predictor_complex( | ||||
|             if (m == 0) | ||||
|               dst_tmp[j] = dst2_tmp[j]; | ||||
|             else | ||||
|               dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; | ||||
|               dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m + | ||||
|                                               dst2_tmp[j] * (64 - m), 6); | ||||
|           } | ||||
|           dst_tmp += dst_stride; | ||||
|           dst2_tmp += dst2_stride; | ||||
| @@ -978,7 +983,8 @@ void vp10_build_masked_inter_predictor_complex( | ||||
|             if (m == 0) | ||||
|               dst_tmp[j] = dst2_tmp[j]; | ||||
|             else | ||||
|               dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; | ||||
|               dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m + | ||||
|                                               dst2_tmp[j] * (64 - m), 6); | ||||
|           } | ||||
|           memcpy(dst_tmp + j, dst2_tmp + j, | ||||
|                      (top_w - w_offset - w) * sizeof(uint16_t)); | ||||
| @@ -1001,7 +1007,8 @@ void vp10_build_masked_inter_predictor_complex( | ||||
|             if (m == 0) | ||||
|               dst_tmp[j] = dst2_tmp[j]; | ||||
|             else | ||||
|               dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; | ||||
|               dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m + | ||||
|                                               dst2_tmp[j] * (64 - m), 6); | ||||
|           } | ||||
|             memcpy(dst_tmp + j, dst2_tmp + j, | ||||
|                        (top_w - w_offset - w) * sizeof(uint8_t)); | ||||
| @@ -1158,12 +1165,39 @@ static const uint8_t obmc_mask_16[2][16] = { | ||||
| }; | ||||
|  | ||||
| static const uint8_t obmc_mask_32[2][32] = { | ||||
|     { 33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55, | ||||
|       56, 57, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64}, | ||||
|     { 31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11,  9, | ||||
|        8,  7,  6,  5,  4,  4,  3,  2,  2,  1,  1,  0,  0,  0,  0,  0} | ||||
|     { 33, 35, 36, 38, 40, 41, 43, 44, | ||||
|       45, 47, 48, 50, 51, 52, 53, 55, | ||||
|       56, 57, 58, 59, 60, 60, 61, 62, | ||||
|       62, 63, 63, 64, 64, 64, 64, 64 }, | ||||
|     { 31, 29, 28, 26, 24, 23, 21, 20, | ||||
|       19, 17, 16, 14, 13, 12, 11,  9, | ||||
|        8,  7,  6,  5,  4,  4,  3,  2, | ||||
|        2,  1,  1,  0,  0,  0,  0,  0 } | ||||
| }; | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION | ||||
| // TODO(debargha): What are the correct values here? | ||||
| static const uint8_t obmc_mask_64[2][64] = { | ||||
|     { 33, 33, 35, 35, 36, 36, 38, 38, | ||||
|       40, 40, 41, 41, 43, 43, 44, 44, | ||||
|       45, 45, 47, 47, 48, 48, 50, 50, | ||||
|       51, 51, 52, 52, 53, 53, 55, 55, | ||||
|       56, 56, 57, 57, 58, 58, 59, 59, | ||||
|       60, 60, 60, 60, 61, 61, 62, 62, | ||||
|       62, 62, 63, 63, 63, 63, 64, 64, | ||||
|       64, 64, 64, 64, 64, 64, 64, 64 }, | ||||
|     { 31, 31, 29, 29, 28, 28, 26, 26, | ||||
|       24, 24, 23, 23, 21, 21, 20, 20, | ||||
|       19, 19, 17, 17, 16, 16, 14, 14, | ||||
|       13, 13, 12, 12, 11, 11,  9,  9, | ||||
|        8,  8,  7,  7,  6,  6,  5,  5, | ||||
|        4,  4,  4,  4,  3,  3,  2,  2, | ||||
|        2,  2,  1,  1,  1,  1,  0,  0, | ||||
|        0,  0,  0,  0,  0,  0,  0,  0 } | ||||
| }; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
|  | ||||
| void setup_obmc_mask(int length, const uint8_t *mask[2]) { | ||||
|   switch (length) { | ||||
|     case 1: | ||||
| @@ -1190,9 +1224,15 @@ void setup_obmc_mask(int length, const uint8_t *mask[2]) { | ||||
|       mask[0] = obmc_mask_32[0]; | ||||
|       mask[1] = obmc_mask_32[1]; | ||||
|       break; | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     case 64: | ||||
|       mask[0] = obmc_mask_64[0]; | ||||
|       mask[1] = obmc_mask_64[1]; | ||||
|       break; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|     default: | ||||
|       mask[0] = obmc_mask_32[0]; | ||||
|       mask[1] = obmc_mask_32[1]; | ||||
|       mask[0] = NULL; | ||||
|       mask[1] = NULL; | ||||
|       assert(0); | ||||
|       break; | ||||
|   } | ||||
| @@ -1265,15 +1305,15 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, | ||||
|  | ||||
|     for (plane = 0; plane < MAX_MB_PLANE; ++plane) { | ||||
|       const struct macroblockd_plane *pd = &xd->plane[plane]; | ||||
|       int bw = (mi_step * 8) >> pd->subsampling_x; | ||||
|       int bw = (mi_step * MI_SIZE) >> pd->subsampling_x; | ||||
|       int bh = overlap >> pd->subsampling_y; | ||||
|       int row, col; | ||||
|       int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride; | ||||
|       uint8_t *dst = use_tmp_dst_buf ? | ||||
|           &final_buf[plane][(i * 8) >> pd->subsampling_x] : | ||||
|           &pd->dst.buf[(i * 8) >> pd->subsampling_x]; | ||||
|           &final_buf[plane][(i * MI_SIZE) >> pd->subsampling_x] : | ||||
|           &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x]; | ||||
|       int tmp_stride = tmp_stride1[plane]; | ||||
|       uint8_t *tmp = &tmp_buf1[plane][(i * 8) >> pd->subsampling_x]; | ||||
|       uint8_t *tmp = &tmp_buf1[plane][(i * MI_SIZE) >> pd->subsampling_x]; | ||||
|       const uint8_t *mask[2]; | ||||
|  | ||||
|       setup_obmc_mask(bh, mask); | ||||
| @@ -1285,8 +1325,9 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, | ||||
|  | ||||
|         for (row = 0; row < bh; ++row) { | ||||
|           for (col = 0; col < bw; ++col) | ||||
|             dst16[col] = (mask[0][row] * dst16[col] + mask[1][row] * tmp16[col] | ||||
|                           + 32) >> 6; | ||||
|             dst16[col] = ROUND_POWER_OF_TWO(mask[0][row] * dst16[col] + | ||||
|                                             mask[1][row] * tmp16[col], 6); | ||||
|  | ||||
|           dst16 += dst_stride; | ||||
|           tmp16 += tmp_stride; | ||||
|         } | ||||
| @@ -1294,8 +1335,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|       for (row = 0; row < bh; ++row) { | ||||
|         for (col = 0; col < bw; ++col) | ||||
|           dst[col] = (mask[0][row] * dst[col] + mask[1][row] * tmp[col] + 32) | ||||
|                      >> 6; | ||||
|           dst[col] = ROUND_POWER_OF_TWO(mask[0][row] * dst[col] + | ||||
|                                         mask[1][row] * tmp[col], 6); | ||||
|         dst += dst_stride; | ||||
|         tmp += tmp_stride; | ||||
|       } | ||||
| @@ -1332,15 +1373,15 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, | ||||
|     for (plane = 0; plane < MAX_MB_PLANE; ++plane) { | ||||
|       const struct macroblockd_plane *pd = &xd->plane[plane]; | ||||
|       int bw = overlap >> pd->subsampling_x; | ||||
|       int bh = (mi_step * 8) >> pd->subsampling_y; | ||||
|       int bh = (mi_step * MI_SIZE) >> pd->subsampling_y; | ||||
|       int row, col; | ||||
|       int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride; | ||||
|       uint8_t *dst = use_tmp_dst_buf ? | ||||
|           &final_buf[plane][(i * 8 * dst_stride) >> pd->subsampling_y] : | ||||
|           &pd->dst.buf[(i * 8 * dst_stride) >> pd->subsampling_y]; | ||||
|           &final_buf[plane][(i * MI_SIZE * dst_stride) >> pd->subsampling_y] : | ||||
|           &pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y]; | ||||
|       int tmp_stride = tmp_stride2[plane]; | ||||
|       uint8_t *tmp = &tmp_buf2[plane] | ||||
|                               [(i * 8 * tmp_stride) >> pd->subsampling_y]; | ||||
|                               [(i * MI_SIZE * tmp_stride) >> pd->subsampling_y]; | ||||
|       const uint8_t *mask[2]; | ||||
|  | ||||
|       setup_obmc_mask(bw, mask); | ||||
| @@ -1352,8 +1393,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, | ||||
|  | ||||
|         for (row = 0; row < bh; ++row) { | ||||
|           for (col = 0; col < bw; ++col) | ||||
|             dst16[col] = (mask[0][col] * dst16[col] + mask[1][col] * tmp16[col] | ||||
|                           + 32) >> 6; | ||||
|             dst16[col] = ROUND_POWER_OF_TWO(mask[0][col] * dst16[col] + | ||||
|                                             mask[1][col] * tmp16[col], 6); | ||||
|           dst16 += dst_stride; | ||||
|           tmp16 += tmp_stride; | ||||
|         } | ||||
| @@ -1361,8 +1402,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|       for (row = 0; row < bh; ++row) { | ||||
|         for (col = 0; col < bw; ++col) | ||||
|           dst[col] = (mask[0][col] * dst[col] + mask[1][col] * tmp[col] + 32) | ||||
|                      >> 6; | ||||
|           dst[col] = ROUND_POWER_OF_TWO(mask[0][col] * dst[col] + | ||||
|                                         mask[1][col] * tmp[col], 6); | ||||
|         dst += dst_stride; | ||||
|         tmp += tmp_stride; | ||||
|       } | ||||
| @@ -1572,7 +1613,31 @@ static void combine_interintra(PREDICTION_MODE mode, | ||||
|   static const int scale_bits = 8; | ||||
|   static const int scale_max = 256; | ||||
|   static const int scale_round = 127; | ||||
|   static const int weights1d[64] = { | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   // TODO(debargha): Fill in the correct weights for 128 wide blocks. | ||||
|   static const int weights1d[MAX_SB_SIZE] = { | ||||
|       128, 128, 125, 125, 122, 122, 119, 119, | ||||
|       116, 116, 114, 114, 111, 111, 109, 109, | ||||
|       107, 107, 105, 105, 103, 103, 101, 101, | ||||
|        99,  99,  97,  97,  96,  96,  94,  94, | ||||
|        93,  93,  91,  91,  90,  90,  89,  89, | ||||
|        88,  88,  86,  86,  85,  85,  84,  84, | ||||
|        83,  83,  82,  82,  81,  81,  81,  81, | ||||
|        80,  80,  79,  79,  78,  78,  78,  78, | ||||
|        77,  77,  76,  76,  76,  76,  75,  75, | ||||
|        75,  75,  74,  74,  74,  74,  73,  73, | ||||
|        73,  73,  72,  72,  72,  72,  71,  71, | ||||
|        71,  71,  71,  71,  70,  70,  70,  70, | ||||
|        70,  70,  70,  70,  69,  69,  69,  69, | ||||
|        69,  69,  69,  69,  68,  68,  68,  68, | ||||
|        68,  68,  68,  68,  68,  68,  67,  67, | ||||
|        67,  67,  67,  67,  67,  67,  67,  67, | ||||
|   }; | ||||
|   static int size_scales[BLOCK_SIZES] = { | ||||
|       32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 | ||||
|   }; | ||||
| #else | ||||
|   static const int weights1d[MAX_SB_SIZE] = { | ||||
|       128, 125, 122, 119, 116, 114, 111, 109, | ||||
|       107, 105, 103, 101,  99,  97,  96,  94, | ||||
|        93,  91,  90,  89,  88,  86,  85,  84, | ||||
| @@ -1582,14 +1647,14 @@ static void combine_interintra(PREDICTION_MODE mode, | ||||
|        70,  70,  69,  69,  69,  69,  68,  68, | ||||
|        68,  68,  68,  67,  67,  67,  67,  67, | ||||
|   }; | ||||
|   const int bw = 4 << b_width_log2_lookup[plane_bsize]; | ||||
|   const int bh = 4 << b_height_log2_lookup[plane_bsize]; | ||||
|   static int size_scales[BLOCK_SIZES] = { | ||||
|       16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 | ||||
|   }; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
|   int size = VPXMAX(bw, bh); | ||||
|   int size_scale = (size >= 64 ? 1 : | ||||
|                     size == 32 ? 2 : | ||||
|                     size == 16 ? 4 : | ||||
|                     size == 8  ? 8 : 16); | ||||
|   const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; | ||||
|   const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; | ||||
|   const int size_scale = size_scales[plane_bsize]; | ||||
|   int i, j; | ||||
|  | ||||
|   if (use_wedge_interintra && get_wedge_bits(bsize)) { | ||||
| @@ -1712,7 +1777,31 @@ static void combine_interintra_highbd(PREDICTION_MODE mode, | ||||
|   static const int scale_bits = 8; | ||||
|   static const int scale_max = 256; | ||||
|   static const int scale_round = 127; | ||||
|   static const int weights1d[64] = { | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   // TODO(debargha): Fill in the correct weights for 128 wide blocks. | ||||
|   static const int weights1d[MAX_SB_SIZE] = { | ||||
|       128, 128, 125, 125, 122, 122, 119, 119, | ||||
|       116, 116, 114, 114, 111, 111, 109, 109, | ||||
|       107, 107, 105, 105, 103, 103, 101, 101, | ||||
|        99,  99,  97,  97,  96,  96,  94,  94, | ||||
|        93,  93,  91,  91,  90,  90,  89,  89, | ||||
|        88,  88,  86,  86,  85,  85,  84,  84, | ||||
|        83,  83,  82,  82,  81,  81,  81,  81, | ||||
|        80,  80,  79,  79,  78,  78,  78,  78, | ||||
|        77,  77,  76,  76,  76,  76,  75,  75, | ||||
|        75,  75,  74,  74,  74,  74,  73,  73, | ||||
|        73,  73,  72,  72,  72,  72,  71,  71, | ||||
|        71,  71,  71,  71,  70,  70,  70,  70, | ||||
|        70,  70,  70,  70,  69,  69,  69,  69, | ||||
|        69,  69,  69,  69,  68,  68,  68,  68, | ||||
|        68,  68,  68,  68,  68,  68,  67,  67, | ||||
|        67,  67,  67,  67,  67,  67,  67,  67, | ||||
|   }; | ||||
|   static int size_scales[BLOCK_SIZES] = { | ||||
|       32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 | ||||
|   }; | ||||
| #else | ||||
|   static const int weights1d[MAX_SB_SIZE] = { | ||||
|       128, 125, 122, 119, 116, 114, 111, 109, | ||||
|       107, 105, 103, 101,  99,  97,  96,  94, | ||||
|        93,  91,  90,  89,  88,  86,  85,  84, | ||||
| @@ -1722,15 +1811,16 @@ static void combine_interintra_highbd(PREDICTION_MODE mode, | ||||
|        70,  70,  69,  69,  69,  69,  68,  68, | ||||
|        68,  68,  68,  67,  67,  67,  67,  67, | ||||
|   }; | ||||
|   const int bw = 4 << b_width_log2_lookup[plane_bsize]; | ||||
|   const int bh = 4 << b_height_log2_lookup[plane_bsize]; | ||||
|   static int size_scales[BLOCK_SIZES] = { | ||||
|       16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 | ||||
|   }; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
|   int size = VPXMAX(bw, bh); | ||||
|   int size_scale = (size >= 64 ? 1 : | ||||
|                     size == 32 ? 2 : | ||||
|                     size == 16 ? 4 : | ||||
|                     size == 8  ? 8 : 16); | ||||
|   const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; | ||||
|   const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; | ||||
|   const int size_scale = size_scales[plane_bsize]; | ||||
|   int i, j; | ||||
|  | ||||
|   uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8); | ||||
|   uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8); | ||||
|   uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8); | ||||
| @@ -1889,8 +1979,7 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, | ||||
|   const int bw = 4 << b_width_log2_lookup[bsize]; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|     DECLARE_ALIGNED(16, uint16_t, | ||||
|                     intrapredictor[CU_SIZE * CU_SIZE]); | ||||
|     DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]); | ||||
|     build_intra_predictors_for_interintra( | ||||
|         xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride, | ||||
|         CONVERT_TO_BYTEPTR(intrapredictor), bw, | ||||
| @@ -1907,7 +1996,7 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, | ||||
|   } | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|   { | ||||
|     uint8_t intrapredictor[CU_SIZE * CU_SIZE]; | ||||
|     uint8_t intrapredictor[MAX_SB_SQUARE]; | ||||
|     build_intra_predictors_for_interintra( | ||||
|         xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride, | ||||
|         intrapredictor, bw, | ||||
| @@ -1931,8 +2020,7 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, | ||||
|   const int bw = 4 << b_width_log2_lookup[uvbsize]; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|     DECLARE_ALIGNED(16, uint16_t, | ||||
|                     uintrapredictor[CU_SIZE * CU_SIZE]); | ||||
|     DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]); | ||||
|     build_intra_predictors_for_interintra( | ||||
|         xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, | ||||
|         CONVERT_TO_BYTEPTR(uintrapredictor), bw, | ||||
| @@ -1950,7 +2038,7 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, | ||||
|   } | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|   { | ||||
|     uint8_t uintrapredictor[CU_SIZE * CU_SIZE]; | ||||
|     uint8_t uintrapredictor[MAX_SB_SQUARE]; | ||||
|     build_intra_predictors_for_interintra( | ||||
|         xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, | ||||
|         uintrapredictor, bw, | ||||
| @@ -2117,30 +2205,30 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, | ||||
|     if (ref && get_wedge_bits(mi->mbmi.sb_type) | ||||
|         && mi->mbmi.use_wedge_interinter) { | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|       uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE]; | ||||
|       uint8_t tmp_dst_[2 * MAX_SB_SQUARE]; | ||||
|       uint8_t *tmp_dst = | ||||
|           (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? | ||||
|           CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; | ||||
| #else | ||||
|       uint8_t tmp_dst[CU_SIZE * CU_SIZE]; | ||||
|       uint8_t tmp_dst[MAX_SB_SQUARE]; | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|         if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|           int k; | ||||
|           for (k = 0; k < h; ++k) | ||||
|             memcpy(tmp_dst_ + 2 * CU_SIZE * k, ext_dst1 + | ||||
|             memcpy(tmp_dst_ + 2 * MAX_SB_SIZE * k, ext_dst1 + | ||||
|                    ext_dst_stride1 * 2 * k, w * 2); | ||||
|         } else { | ||||
|           int k; | ||||
|           for (k = 0; k < h; ++k) | ||||
|             memcpy(tmp_dst_ + CU_SIZE * k, ext_dst1 + | ||||
|             memcpy(tmp_dst_ + MAX_SB_SIZE * k, ext_dst1 + | ||||
|                    ext_dst_stride1 * k, w); | ||||
|         } | ||||
| #else | ||||
|         { | ||||
|           int k; | ||||
|           for (k = 0; k < h; ++k) | ||||
|             memcpy(tmp_dst + CU_SIZE * k, ext_dst1 + | ||||
|             memcpy(tmp_dst + MAX_SB_SIZE * k, ext_dst1 + | ||||
|                    ext_dst_stride1 * k, w); | ||||
|         } | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
| @@ -2149,20 +2237,20 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|         build_masked_compound_extend_highbd( | ||||
|             dst, dst_buf->stride, tmp_dst, CU_SIZE, plane, | ||||
|             dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, plane, | ||||
|             mi->mbmi.interinter_wedge_index, | ||||
|             mi->mbmi.sb_type, | ||||
|             wedge_offset_y, wedge_offset_x, h, w); | ||||
|       } else { | ||||
|         build_masked_compound_extend( | ||||
|             dst, dst_buf->stride, tmp_dst, CU_SIZE, plane, | ||||
|             dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, plane, | ||||
|             mi->mbmi.interinter_wedge_index, | ||||
|             mi->mbmi.sb_type, | ||||
|             wedge_offset_y, wedge_offset_x, h, w); | ||||
|       } | ||||
| #else | ||||
|       build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, | ||||
|                                    CU_SIZE, plane, | ||||
|                                    MAX_SB_SIZE, plane, | ||||
|                                    mi->mbmi.interinter_wedge_index, | ||||
|                                    mi->mbmi.sb_type, | ||||
|                                    wedge_offset_y, wedge_offset_x, h, w); | ||||
| @@ -2171,12 +2259,12 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) | ||||
|         build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, | ||||
|                                      CU_SIZE, | ||||
|                                      MAX_SB_SIZE, | ||||
|                                      mi->mbmi.interinter_wedge_index, | ||||
|                                      mi->mbmi.sb_type, h, w); | ||||
|       else | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|         build_masked_compound(dst, dst_buf->stride, tmp_dst, CU_SIZE, | ||||
|         build_masked_compound(dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, | ||||
|                               mi->mbmi.interinter_wedge_index, | ||||
|                               mi->mbmi.sb_type, h, w); | ||||
| #endif  // CONFIG_SUPERTX | ||||
|   | ||||
| @@ -443,8 +443,8 @@ void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, | ||||
| #endif  // CONFIG_OBMC | ||||
|  | ||||
| #if CONFIG_EXT_INTER | ||||
| #define MASK_MASTER_SIZE   (2 * CU_SIZE) | ||||
| #define MASK_MASTER_STRIDE (2 * CU_SIZE) | ||||
| #define MASK_MASTER_SIZE   (2 * MAX_SB_SIZE) | ||||
| #define MASK_MASTER_STRIDE (2 * MAX_SB_SIZE) | ||||
|  | ||||
| void vp10_init_wedge_masks(); | ||||
|  | ||||
|   | ||||
| @@ -44,30 +44,30 @@ static const uint8_t extend_modes[INTRA_MODES] = { | ||||
|   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // TM | ||||
| }; | ||||
|  | ||||
| static const uint8_t orders_64x64[1] = { 0 }; | ||||
| static const uint8_t orders_64x32[2] = { 0, 1 }; | ||||
| static const uint8_t orders_32x64[2] = { 0, 1 }; | ||||
| static const uint8_t orders_32x32[4] = { | ||||
| static const uint8_t orders_128x128[1] = { 0 }; | ||||
| static const uint8_t orders_128x64[2] = { 0, 1 }; | ||||
| static const uint8_t orders_64x128[2] = { 0, 1 }; | ||||
| static const uint8_t orders_64x64[4] = { | ||||
|   0, 1, | ||||
|   2, 3, | ||||
| }; | ||||
| static const uint8_t orders_32x16[8] = { | ||||
| static const uint8_t orders_64x32[8] = { | ||||
|   0, 2, | ||||
|   1, 3, | ||||
|   4, 6, | ||||
|   5, 7, | ||||
| }; | ||||
| static const uint8_t orders_16x32[8] = { | ||||
| static const uint8_t orders_32x64[8] = { | ||||
|   0, 1, 2, 3, | ||||
|   4, 5, 6, 7, | ||||
| }; | ||||
| static const uint8_t orders_16x16[16] = { | ||||
| static const uint8_t orders_32x32[16] = { | ||||
|   0,   1,  4,  5, | ||||
|   2,   3,  6,  7, | ||||
|   8,   9, 12, 13, | ||||
|   10, 11, 14, 15, | ||||
| }; | ||||
| static const uint8_t orders_16x8[32] = { | ||||
| static const uint8_t orders_32x16[32] = { | ||||
|   0,   2,  8, 10, | ||||
|   1,   3,  9, 11, | ||||
|   4,   6, 12, 14, | ||||
| @@ -77,13 +77,13 @@ static const uint8_t orders_16x8[32] = { | ||||
|   20, 22, 28, 30, | ||||
|   21, 23, 29, 31, | ||||
| }; | ||||
| static const uint8_t orders_8x16[32] = { | ||||
| static const uint8_t orders_16x32[32] = { | ||||
|   0,   1,  2,  3,  8,  9, 10, 11, | ||||
|   4,   5,  6,  7, 12, 13, 14, 15, | ||||
|   16, 17, 18, 19, 24, 25, 26, 27, | ||||
|   20, 21, 22, 23, 28, 29, 30, 31, | ||||
| }; | ||||
| static const uint8_t orders_8x8[64] = { | ||||
| static const uint8_t orders_16x16[64] = { | ||||
|   0,   1,  4,  5, 16, 17, 20, 21, | ||||
|   2,   3,  6,  7, 18, 19, 22, 23, | ||||
|   8,   9, 12, 13, 24, 25, 28, 29, | ||||
| @@ -93,24 +93,96 @@ static const uint8_t orders_8x8[64] = { | ||||
|   40, 41, 44, 45, 56, 57, 60, 61, | ||||
|   42, 43, 46, 47, 58, 59, 62, 63, | ||||
| }; | ||||
| static const uint8_t *const orders[BLOCK_SIZES] = { | ||||
|   orders_8x8, orders_8x8, orders_8x8, orders_8x8, | ||||
|   orders_8x16, orders_16x8, orders_16x16, | ||||
|   orders_16x32, orders_32x16, orders_32x32, | ||||
|   orders_32x64, orders_64x32, orders_64x64, | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION | ||||
| static const uint8_t orders_16x8[128] = { | ||||
|   0,   2,  8, 10,  32,  34,  40,  42, | ||||
|   1,   3,  9, 11,  33,  35,  41,  43, | ||||
|   4,   6, 12, 14,  36,  38,  44,  46, | ||||
|   5,   7, 13, 15,  37,  39,  45,  47, | ||||
|   16, 18, 24, 26,  48,  50,  56,  58, | ||||
|   17, 19, 25, 27,  49,  51,  57,  59, | ||||
|   20, 22, 28, 30,  52,  54,  60,  62, | ||||
|   21, 23, 29, 31,  53,  55,  61,  63, | ||||
|   64, 66, 72, 74,  96,  98, 104, 106, | ||||
|   65, 67, 73, 75,  97,  99, 105, 107, | ||||
|   68, 70, 76, 78, 100, 102, 108, 110, | ||||
|   69, 71, 77, 79, 101, 103, 109, 111, | ||||
|   80, 82, 88, 90, 112, 114, 120, 122, | ||||
|   81, 83, 89, 91, 113, 115, 121, 123, | ||||
|   84, 86, 92, 94, 116, 118, 124, 126, | ||||
|   85, 87, 93, 95, 117, 119, 125, 127, | ||||
| }; | ||||
| static const uint8_t orders_8x16[128] = { | ||||
|   0,   1,  2,  3,  8,  9, 10, 11,  32,  33,  34,  35,  40,  41,  42,  43, | ||||
|   4,   5,  6,  7, 12, 13, 14, 15,  36,  37,  38,  39,  44,  45,  46,  47, | ||||
|   16, 17, 18, 19, 24, 25, 26, 27,  48,  49,  50,  51,  56,  57,  58,  59, | ||||
|   20, 21, 22, 23, 28, 29, 30, 31,  52,  53,  54,  55,  60,  61,  62,  63, | ||||
|   64, 65, 66, 67, 72, 73, 74, 75,  96,  97,  98,  99, 104, 105, 106, 107, | ||||
|   68, 69, 70, 71, 76, 77, 78, 79, 100, 101, 102, 103, 108, 109, 110, 111, | ||||
|   80, 81, 82, 83, 88, 89, 90, 91, 112, 113, 114, 115, 120, 121, 122, 123, | ||||
|   84, 85, 86, 87, 92, 93, 94, 95, 116, 117, 118, 119, 124, 125, 126, 127, | ||||
| }; | ||||
| static const uint8_t orders_8x8[256] = { | ||||
| 0,     1,   4,   5,  16,  17,  20,  21,  64,  65,  68,  69,  80,  81,  84,  85, | ||||
| 2,     3,   6,   7,  18,  19,  22,  23,  66,  67,  70,  71,  82,  83,  86,  87, | ||||
| 8,     9,  12,  13,  24,  25,  28,  29,  72,  73,  76,  77,  88,  89,  92,  93, | ||||
| 10,   11,  14,  15,  26,  27,  30,  31,  74,  75,  78,  79,  90,  91,  94,  95, | ||||
| 32,   33,  36,  37,  48,  49,  52,  53,  96,  97, 100, 101, 112, 113, 116, 117, | ||||
| 34,   35,  38,  39,  50,  51,  54,  55,  98,  99, 102, 103, 114, 115, 118, 119, | ||||
| 40,   41,  44,  45,  56,  57,  60,  61, 104, 105, 108, 109, 120, 121, 124, 125, | ||||
| 42,   43,  46,  47,  58,  59,  62,  63, 106, 107, 110, 111, 122, 123, 126, 127, | ||||
| 128, 129, 132, 133, 144, 145, 148, 149, 192, 193, 196, 197, 208, 209, 212, 213, | ||||
| 130, 131, 134, 135, 146, 147, 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, | ||||
| 136, 137, 140, 141, 152, 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, | ||||
| 138, 139, 142, 143, 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, | ||||
| 160, 161, 164, 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, | ||||
| 162, 163, 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, | ||||
| 168, 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, | ||||
| 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, 255, | ||||
| }; | ||||
|  | ||||
| static const uint8_t *const orders[BLOCK_SIZES] = { | ||||
|   //                              4X4 | ||||
|                                   orders_8x8, | ||||
|   // 4X8,         8X4,            8X8 | ||||
|   orders_8x8,     orders_8x8,     orders_8x8, | ||||
|   // 8X16,        16X8,           16X16 | ||||
|   orders_8x16,    orders_16x8,    orders_16x16, | ||||
|   // 16X32,       32X16,          32X32 | ||||
|   orders_16x32,   orders_32x16,   orders_32x32, | ||||
|   // 32X64,       64X32,          64X64 | ||||
|   orders_32x64,   orders_64x32,   orders_64x64, | ||||
|   // 64x128,      128x64,         128x128 | ||||
|   orders_64x128,  orders_128x64,  orders_128x128 | ||||
| }; | ||||
| #else | ||||
| static const uint8_t *const orders[BLOCK_SIZES] = { | ||||
|   //                              4X4 | ||||
|                                   orders_16x16, | ||||
|   // 4X8,         8X4,            8X8 | ||||
|   orders_16x16,   orders_16x16,   orders_16x16, | ||||
|   // 8X16,        16X8,           16X16 | ||||
|   orders_16x32,   orders_32x16,   orders_32x32, | ||||
|   // 16X32,       32X16,          32X32 | ||||
|   orders_32x64,   orders_64x32,   orders_64x64, | ||||
|   // 32X64,       64X32,          64X64 | ||||
|   orders_64x128,  orders_128x64,  orders_128x128 | ||||
| }; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION_TYPES | ||||
| static const uint8_t orders_verta_32x32[4] = { | ||||
| static const uint8_t orders_verta_64x64[4] = { | ||||
|   0, 2, | ||||
|   1, 2, | ||||
| }; | ||||
| static const uint8_t orders_verta_16x16[16] = { | ||||
| static const uint8_t orders_verta_32x32[16] = { | ||||
|   0,   2,  4,  6, | ||||
|   1,   2,  5,  6, | ||||
|   8,  10, 12, 14, | ||||
|   9,  10, 13, 14, | ||||
| }; | ||||
| static const uint8_t orders_verta_8x8[64] = { | ||||
| static const uint8_t orders_verta_16x16[64] = { | ||||
|   0,   2,  4,  6, 16, 18, 20, 22, | ||||
|   1,   2,  5,  6, 17, 18, 21, 22, | ||||
|   8,  10, 12, 14, 24, 26, 28, 30, | ||||
| @@ -120,12 +192,53 @@ static const uint8_t orders_verta_8x8[64] = { | ||||
|   40, 42, 44, 46, 56, 58, 60, 62, | ||||
|   41, 42, 45, 46, 57, 58, 61, 62, | ||||
| }; | ||||
| static const uint8_t *const orders_verta[BLOCK_SIZES] = { | ||||
|   orders_verta_8x8, orders_verta_8x8, orders_verta_8x8, orders_verta_8x8, | ||||
|   orders_8x16, orders_16x8, orders_verta_16x16, | ||||
|   orders_16x32, orders_32x16, orders_verta_32x32, | ||||
|   orders_32x64, orders_64x32, orders_64x64, | ||||
| #if CONFIG_EXT_PARTITION | ||||
| static const uint8_t orders_verta_8x8[256] = { | ||||
| 0,     2,   4,   6,  16,  18,  20,  22,  64,  66,  68,  70,  80,  82,  84,  86, | ||||
| 1,     2,   5,   6,  17,  18,  21,  22,  65,  66,  69,  70,  81,  82,  85,  86, | ||||
| 8,    10,  12,  14,  24,  26,  28,  30,  72,  74,  76,  78,  88,  90,  92,  94, | ||||
| 9,    10,  13,  14,  25,  26,  29,  30,  73,  74,  77,  78,  89,  90,  93,  94, | ||||
| 32,   34,  36,  38,  48,  50,  52,  54,  96,  98, 100, 102, 112, 114, 116, 118, | ||||
| 33,   34,  37,  38,  49,  50,  53,  54,  97,  98, 101, 102, 113, 114, 117, 118, | ||||
| 40,   42,  44,  46,  56,  58,  60,  62, 104, 106, 108, 110, 120, 122, 124, 126, | ||||
| 41,   42,  45,  46,  57,  58,  61,  62, 105, 106, 109, 110, 121, 122, 125, 126, | ||||
| 128, 130, 132, 134, 144, 146, 148, 150, 192, 194, 196, 198, 208, 210, 212, 214, | ||||
| 129, 130, 133, 134, 145, 146, 149, 150, 193, 194, 197, 198, 209, 210, 213, 214, | ||||
| 136, 138, 140, 142, 152, 154, 156, 158, 200, 202, 204, 206, 216, 218, 220, 222, | ||||
| 137, 138, 141, 142, 153, 154, 157, 158, 201, 202, 205, 206, 217, 218, 221, 222, | ||||
| 160, 162, 164, 166, 176, 178, 180, 182, 224, 226, 228, 230, 240, 242, 244, 246, | ||||
| 161, 162, 165, 166, 177, 178, 181, 182, 225, 226, 229, 230, 241, 242, 245, 246, | ||||
| 168, 170, 172, 174, 184, 186, 188, 190, 232, 234, 236, 238, 248, 250, 252, 254, | ||||
| 169, 170, 173, 174, 185, 186, 189, 190, 233, 234, 237, 238, 249, 250, 253, 254, | ||||
| }; | ||||
| static const uint8_t *const orders_verta[BLOCK_SIZES] = { | ||||
|   //                                  4X4 | ||||
|                                       orders_verta_8x8, | ||||
|   // 4X8,           8X4,              8X8 | ||||
|   orders_verta_8x8, orders_verta_8x8, orders_verta_8x8, | ||||
|   // 8X16,          16X8,             16X16 | ||||
|   orders_8x16,      orders_16x8,      orders_verta_16x16, | ||||
|   // 16X32,         32X16,            32X32 | ||||
|   orders_16x32,     orders_32x16,     orders_verta_32x32, | ||||
|   // 32X64,         64X32,            64X64 | ||||
|   orders_32x64,     orders_64x32,     orders_verta_64x64, | ||||
|   // 64x128,        128x64,           128x128 | ||||
|   orders_64x128,    orders_128x64,    orders_128x128 | ||||
| }; | ||||
| #else | ||||
| static const uint8_t *const orders_verta[BLOCK_SIZES] = { | ||||
|   //                                      4X4 | ||||
|                                           orders_verta_16x16, | ||||
|   // 4X8,             8X4,                8X8 | ||||
|   orders_verta_16x16, orders_verta_16x16, orders_verta_16x16, | ||||
|   // 8X16,            16X8,               16X16 | ||||
|   orders_16x32,       orders_32x16,       orders_verta_32x32, | ||||
|   // 16X32,           32X16,              32X32 | ||||
|   orders_32x64,       orders_64x32,       orders_verta_64x64, | ||||
|   // 32X64,           64X32,              64X64 | ||||
|   orders_64x128,      orders_128x64,      orders_128x128 | ||||
| }; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| #endif  // CONFIG_EXT_PARTITION_TYPES | ||||
|  | ||||
| static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col, | ||||
| @@ -154,24 +267,26 @@ static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col, | ||||
|         order = orders_verta[bsize]; | ||||
|       else | ||||
| #endif  // CONFIG_EXT_PARTITION_TYPES | ||||
|         order = orders[bsize]; | ||||
|       order = orders[bsize]; | ||||
|  | ||||
|       if (x + step < w) | ||||
|         return 1; | ||||
|  | ||||
|       mi_row = (mi_row & 7) >> hl; | ||||
|       mi_col = (mi_col & 7) >> wl; | ||||
|       mi_row = (mi_row & MI_MASK) >> hl; | ||||
|       mi_col = (mi_col & MI_MASK) >> wl; | ||||
|  | ||||
|       // If top row of coding unit | ||||
|       if (mi_row == 0) | ||||
|         return 1; | ||||
|  | ||||
|       // If rightmost column of coding unit | ||||
|       if (((mi_col + 1) << wl) >= 8) | ||||
|       if (((mi_col + 1) << wl) >= MI_BLOCK_SIZE) | ||||
|         return 0; | ||||
|  | ||||
|       my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0]; | ||||
|       tr_order = order[((mi_row - 1) << (3 - wl)) + mi_col + 1]; | ||||
|       my_order = | ||||
|         order[((mi_row + 0) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 0]; | ||||
|       tr_order = | ||||
|         order[((mi_row - 1) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 1]; | ||||
|  | ||||
|       return my_order > tr_order; | ||||
|     } else { | ||||
| @@ -200,17 +315,17 @@ static int vp10_has_bottom(BLOCK_SIZE bsize, int mi_row, int mi_col, | ||||
|     if (y + step < h) | ||||
|       return 1; | ||||
|  | ||||
|     mi_row = (mi_row & 7) >> hl; | ||||
|     mi_col = (mi_col & 7) >> wl; | ||||
|     mi_row = (mi_row & MI_MASK) >> hl; | ||||
|     mi_col = (mi_col & MI_MASK) >> wl; | ||||
|  | ||||
|     if (mi_col == 0) | ||||
|       return (mi_row << (hl + !ss_y)) + y + step < (8 << !ss_y); | ||||
|       return (mi_row << (hl + !ss_y)) + y + step < (MI_BLOCK_SIZE << !ss_y); | ||||
|  | ||||
|     if (((mi_row + 1) << hl) >= 8) | ||||
|     if (((mi_row + 1) << hl) >= MI_BLOCK_SIZE) | ||||
|       return 0; | ||||
|  | ||||
|     my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0]; | ||||
|     bl_order = order[((mi_row + 1) << (3 - wl)) + mi_col - 1]; | ||||
|     my_order = order[((mi_row + 0) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 0]; | ||||
|     bl_order = order[((mi_row + 1) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col - 1]; | ||||
|  | ||||
|     return bl_order < my_order; | ||||
|   } | ||||
| @@ -336,8 +451,8 @@ static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs, | ||||
|   if (filter_type != INTRA_FILTER_LINEAR) { | ||||
|     const int pad_size = SUBPEL_TAPS >> 1; | ||||
|     int len; | ||||
|     DECLARE_ALIGNED(16, uint8_t, buf[SUBPEL_SHIFTS][64]); | ||||
|     DECLARE_ALIGNED(16, uint8_t, src[64 + SUBPEL_TAPS]); | ||||
|     DECLARE_ALIGNED(16, uint8_t, buf[SUBPEL_SHIFTS][MAX_SB_SIZE]); | ||||
|     DECLARE_ALIGNED(16, uint8_t, src[MAX_SB_SIZE + SUBPEL_TAPS]); | ||||
|     uint8_t flags[SUBPEL_SHIFTS]; | ||||
|  | ||||
|     memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0])); | ||||
| @@ -467,8 +582,8 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs, | ||||
|   if (filter_type != INTRA_FILTER_LINEAR) { | ||||
|     const int pad_size = SUBPEL_TAPS >> 1; | ||||
|     int len, i; | ||||
|     DECLARE_ALIGNED(16, uint8_t, buf[64][4 * SUBPEL_SHIFTS]); | ||||
|     DECLARE_ALIGNED(16, uint8_t, src[(64 + SUBPEL_TAPS) * 4]); | ||||
|     DECLARE_ALIGNED(16, uint8_t, buf[MAX_SB_SIZE][4 * SUBPEL_SHIFTS]); | ||||
|     DECLARE_ALIGNED(16, uint8_t, src[(MAX_SB_SIZE + SUBPEL_TAPS) * 4]); | ||||
|     uint8_t flags[SUBPEL_SHIFTS]; | ||||
|  | ||||
|     memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0])); | ||||
| @@ -1063,8 +1178,8 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, | ||||
|   int i; | ||||
|   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | ||||
|   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); | ||||
|   DECLARE_ALIGNED(16, uint16_t, left_col[64]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, left_col[MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, above_data[MAX_SB_SIZE + 16]); | ||||
|   uint16_t *above_row = above_data + 16; | ||||
|   const uint16_t *const_above_row = above_row; | ||||
|   const int bs = 4 << tx_size; | ||||
| @@ -1220,9 +1335,9 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, | ||||
|                                    int n_left_px, int n_bottomleft_px, | ||||
|                                    int plane) { | ||||
|   int i; | ||||
|   DECLARE_ALIGNED(16, uint8_t, left_col[64]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, left_col[MAX_SB_SIZE]); | ||||
|   const uint8_t *above_ref = ref - ref_stride; | ||||
|   DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, above_data[MAX_SB_SIZE + 16]); | ||||
|   uint8_t *above_row = above_data + 16; | ||||
|   const uint8_t *const_above_row = above_row; | ||||
|   const int bs = 4 << tx_size; | ||||
|   | ||||
| @@ -109,6 +109,12 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, | ||||
|     path = LF_PATH_SLOW; | ||||
| #endif  // !CONFIG_EXT_PARTITION_TYPES | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   printf("STOPPING: This code has not been modified to work with the " | ||||
|          "extended coding unit size experiment"); | ||||
|   exit(EXIT_FAILURE); | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
|   for (mi_row = start; mi_row < stop; | ||||
|        mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { | ||||
|     MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; | ||||
| @@ -176,6 +182,12 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, | ||||
|   const int num_workers = VPXMIN(nworkers, tile_cols); | ||||
|   int i; | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION | ||||
|       printf("STOPPING: This code has not been modified to work with the " | ||||
|              "extended coding unit size experiment"); | ||||
|       exit(EXIT_FAILURE); | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
|   if (!lf_sync->sync_range || sb_rows != lf_sync->rows || | ||||
|       num_workers > lf_sync->num_workers) { | ||||
|     vp10_loop_filter_dealloc(lf_sync); | ||||
|   | ||||
| @@ -5,8 +5,8 @@ | ||||
| #include "vpx_dsp/vpx_dsp_common.h" | ||||
| #include "vpx_ports/mem.h" | ||||
|  | ||||
| #define MAX_BLOCK_WIDTH (64) | ||||
| #define MAX_BLOCK_HEIGHT (64) | ||||
| #define MAX_BLOCK_WIDTH (MAX_SB_SIZE) | ||||
| #define MAX_BLOCK_HEIGHT (MAX_SB_SIZE) | ||||
| #define MAX_STEP (32) | ||||
| #define MAX_FILTER_TAP (12) | ||||
|  | ||||
|   | ||||
| @@ -489,7 +489,7 @@ static void extend_and_predict_highbd(const uint8_t *buf_ptr1, | ||||
|                                       MACROBLOCKD *xd, | ||||
|                                       int w, int h, int ref, int xs, int ys) { | ||||
|   DECLARE_ALIGNED(16, uint16_t, | ||||
|                   mc_buf_high[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]); | ||||
|     mc_buf_high[(MAX_SB_SIZE + 16) * 2 * (MAX_SB_SIZE + 16) * 2]); | ||||
|   const uint8_t *buf_ptr; | ||||
|  | ||||
|   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
| @@ -535,7 +535,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, | ||||
| #endif  // CONFIG_EXT_INTER && CONFIG_SUPERTX | ||||
|                                MACROBLOCKD *xd, | ||||
|                                int w, int h, int ref, int xs, int ys) { | ||||
|   DECLARE_ALIGNED(16, uint8_t, mc_buf[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, | ||||
|     mc_buf[(MAX_SB_SIZE + 16) * 2 * (MAX_SB_SIZE + 16) * 2]); | ||||
|   const uint8_t *buf_ptr; | ||||
|  | ||||
|   build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, | ||||
| @@ -1093,7 +1094,7 @@ static void set_param_topblock(VP10_COMMON *const cm,  MACROBLOCKD *const xd, | ||||
|     } | ||||
| #if CONFIG_VAR_TX | ||||
|   xd->above_txfm_context = cm->above_txfm_context + mi_col; | ||||
|   xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); | ||||
|   xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK); | ||||
|   set_txfm_ctx(xd->left_txfm_context, xd->mi[0]->mbmi.tx_size, bh); | ||||
|   set_txfm_ctx(xd->above_txfm_context, xd->mi[0]->mbmi.tx_size, bw); | ||||
| #endif | ||||
| @@ -1304,38 +1305,38 @@ static void dec_predict_sb_complex(VP10Decoder *const pbi, | ||||
|   uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; | ||||
|  | ||||
|   DECLARE_ALIGNED(16, uint8_t, | ||||
|                   tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); | ||||
|                   tmp_buf1[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, | ||||
|                   tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); | ||||
|                   tmp_buf2[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, | ||||
|                   tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); | ||||
|   int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; | ||||
|   int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; | ||||
|   int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; | ||||
|                   tmp_buf3[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); | ||||
|   int dst_stride1[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; | ||||
|   int dst_stride2[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; | ||||
|   int dst_stride3[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; | ||||
|  | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|     int len = sizeof(uint16_t); | ||||
|     dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); | ||||
|     dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_TX_SQUARE * len); | ||||
|     dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_TX_SQUARE * len); | ||||
|     dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); | ||||
|     dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_TX_SQUARE * len); | ||||
|     dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_TX_SQUARE * len); | ||||
|     dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3); | ||||
|     dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAX_TX_SQUARE * len); | ||||
|     dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAX_TX_SQUARE * len); | ||||
|   } else { | ||||
| #endif | ||||
|     dst_buf1[0] = tmp_buf1; | ||||
|     dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf1[1] = tmp_buf1 + MAX_TX_SQUARE; | ||||
|     dst_buf1[2] = tmp_buf1 + 2 * MAX_TX_SQUARE; | ||||
|     dst_buf2[0] = tmp_buf2; | ||||
|     dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf2[1] = tmp_buf2 + MAX_TX_SQUARE; | ||||
|     dst_buf2[2] = tmp_buf2 + 2 * MAX_TX_SQUARE; | ||||
|     dst_buf3[0] = tmp_buf3; | ||||
|     dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf3[1] = tmp_buf3 + MAX_TX_SQUARE; | ||||
|     dst_buf3[2] = tmp_buf3 + 2 * MAX_TX_SQUARE; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   } | ||||
| #endif | ||||
| @@ -1900,39 +1901,37 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, | ||||
|       if (mbmi->obmc) { | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|         DECLARE_ALIGNED(16, uint8_t, | ||||
|                         tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); | ||||
|                         tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
|         DECLARE_ALIGNED(16, uint8_t, | ||||
|                         tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); | ||||
|                         tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
| #else | ||||
|         DECLARE_ALIGNED(16, uint8_t, | ||||
|                         tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); | ||||
|                         tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
|         DECLARE_ALIGNED(16, uint8_t, | ||||
|                         tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); | ||||
|                         tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|         uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; | ||||
|         int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; | ||||
|         int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; | ||||
|         int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; | ||||
|         int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; | ||||
|  | ||||
|         assert(mbmi->sb_type >= BLOCK_8X8); | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|         if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|           int len = sizeof(uint16_t); | ||||
|           dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); | ||||
|           dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len); | ||||
|           dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + | ||||
|                                            CU_SIZE * CU_SIZE * 2 * len); | ||||
|           dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len); | ||||
|           dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len); | ||||
|           dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); | ||||
|           dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len); | ||||
|           dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + | ||||
|                                            CU_SIZE * CU_SIZE * 2 * len); | ||||
|           dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len); | ||||
|           dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len); | ||||
|         } else { | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|           dst_buf1[0] = tmp_buf1; | ||||
|           dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE; | ||||
|           dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2; | ||||
|           dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE; | ||||
|           dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2; | ||||
|           dst_buf2[0] = tmp_buf2; | ||||
|           dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE; | ||||
|           dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2; | ||||
|           dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE; | ||||
|           dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|         } | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
| @@ -3281,7 +3280,7 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi, | ||||
| #if CONFIG_ANS | ||||
|                            &td->token_ans, | ||||
| #endif  // CONFIG_ANS | ||||
|                            BLOCK_64X64, 4); | ||||
|                            BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2); | ||||
|         } | ||||
|         pbi->mb.corrupted |= td->xd.corrupted; | ||||
|         if (pbi->mb.corrupted) | ||||
| @@ -3396,7 +3395,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data, | ||||
| #if CONFIG_ANS | ||||
|                        &tile_data->token_ans, | ||||
| #endif  // CONFIG_ANS | ||||
|                        BLOCK_64X64, 4); | ||||
|                        BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2); | ||||
|     } | ||||
|   } | ||||
|   return !tile_data->xd.corrupted; | ||||
|   | ||||
| @@ -39,8 +39,8 @@ typedef struct TileData { | ||||
| #endif  // CONFIG_ANS | ||||
|   DECLARE_ALIGNED(16, MACROBLOCKD, xd); | ||||
|   /* dqcoeff are shared by all the planes. So planes must be decoded serially */ | ||||
|   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, color_index_map[2][64 * 64]); | ||||
|   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]); | ||||
| } TileData; | ||||
|  | ||||
| typedef struct TileWorkerData { | ||||
| @@ -52,8 +52,8 @@ typedef struct TileWorkerData { | ||||
|   FRAME_COUNTS counts; | ||||
|   DECLARE_ALIGNED(16, MACROBLOCKD, xd); | ||||
|   /* dqcoeff are shared by all the planes. So planes must be decoded serially */ | ||||
|   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, color_index_map[2][64 * 64]); | ||||
|   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]); | ||||
|   struct vpx_internal_error_info error_info; | ||||
| } TileWorkerData; | ||||
|  | ||||
|   | ||||
| @@ -62,7 +62,7 @@ static int decode_coefs(const MACROBLOCKD *xd, | ||||
|   const vpx_prob *prob; | ||||
|   unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; | ||||
|   unsigned int (*eob_branch_count)[COEFF_CONTEXTS]; | ||||
|   uint8_t token_cache[32 * 32]; | ||||
|   uint8_t token_cache[MAX_TX_SQUARE]; | ||||
|   const uint8_t *band_translate = get_band_translate(tx_size); | ||||
|   int dq_shift; | ||||
|   int v, token; | ||||
| @@ -245,7 +245,7 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd, | ||||
|   const vpx_prob *prob; | ||||
|   unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; | ||||
|   unsigned int (*eob_branch_count)[COEFF_CONTEXTS]; | ||||
|   uint8_t token_cache[32 * 32]; | ||||
|   uint8_t token_cache[MAX_TX_SQUARE]; | ||||
|   const uint8_t *band_translate = get_band_translate(tx_size); | ||||
|   int dq_shift; | ||||
|   int v, token; | ||||
|   | ||||
| @@ -116,8 +116,8 @@ void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs, | ||||
|   VP10_COMMON *const cm = &cpi->common; | ||||
|  | ||||
|   const int mi_offset = mi_row * cm->mi_cols + mi_col; | ||||
|   const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; | ||||
|   const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; | ||||
|   const int bw = num_8x8_blocks_wide_lookup[BLOCK_LARGEST]; | ||||
|   const int bh = num_8x8_blocks_high_lookup[BLOCK_LARGEST]; | ||||
|   const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]); | ||||
|   const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]); | ||||
|   int x, y; | ||||
|   | ||||
| @@ -415,9 +415,9 @@ static void cyclic_refresh_update_map(VP10_COMP *const cpi) { | ||||
|     bl_index = mi_row * cm->mi_cols + mi_col; | ||||
|     // Loop through all 8x8 blocks in superblock and update map. | ||||
|     xmis = | ||||
|         VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_64X64]); | ||||
|         VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_LARGEST]); | ||||
|     ymis = | ||||
|         VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_64X64]); | ||||
|         VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_LARGEST]); | ||||
|     for (y = 0; y < ymis; y++) { | ||||
|       for (x = 0; x < xmis; x++) { | ||||
|         const int bl_index2 = bl_index + y * cm->mi_cols + x; | ||||
|   | ||||
| @@ -32,9 +32,11 @@ static const int segment_id[ENERGY_SPAN] = {0, 1, 1, 2, 3, 4}; | ||||
|  | ||||
| #define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN] | ||||
|  | ||||
| DECLARE_ALIGNED(16, static const uint8_t, vp10_64_zeros[64]) = {0}; | ||||
| DECLARE_ALIGNED(16, static const uint8_t, | ||||
|                 vp10_all_zeros[MAX_SB_SIZE]) = {0}; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
| DECLARE_ALIGNED(16, static const uint16_t, vp10_highbd_64_zeros[64]) = {0}; | ||||
| DECLARE_ALIGNED(16, static const uint16_t, | ||||
|                 vp10_highbd_all_zeros[MAX_SB_SIZE]) = {0}; | ||||
| #endif | ||||
|  | ||||
| unsigned int vp10_vaq_segment_id(int energy) { | ||||
| @@ -153,17 +155,17 @@ static unsigned int block_variance(VP10_COMP *cpi, MACROBLOCK *x, | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|       aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride, | ||||
|                            CONVERT_TO_BYTEPTR(vp10_highbd_64_zeros), 0, bw, bh, | ||||
|                            CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros), 0, bw, bh, | ||||
|                            &sse, &avg); | ||||
|       sse >>= 2 * (xd->bd - 8); | ||||
|       avg >>= (xd->bd - 8); | ||||
|     } else { | ||||
|       aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, | ||||
|                   vp10_64_zeros, 0, bw, bh, &sse, &avg); | ||||
|                   vp10_all_zeros, 0, bw, bh, &sse, &avg); | ||||
|     } | ||||
| #else | ||||
|     aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, | ||||
|                 vp10_64_zeros, 0, bw, bh, &sse, &avg); | ||||
|                 vp10_all_zeros, 0, bw, bh, &sse, &avg); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|     var = sse - (((int64_t)avg * avg) / (bw * bh)); | ||||
|     return (256 * var) / (bw * bh); | ||||
| @@ -172,17 +174,17 @@ static unsigned int block_variance(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|       var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, | ||||
|                                x->plane[0].src.stride, | ||||
|                                CONVERT_TO_BYTEPTR(vp10_highbd_64_zeros), | ||||
|                                CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros), | ||||
|                                0, &sse); | ||||
|     } else { | ||||
|       var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, | ||||
|                                x->plane[0].src.stride, | ||||
|                                vp10_64_zeros, 0, &sse); | ||||
|                                vp10_all_zeros, 0, &sse); | ||||
|     } | ||||
| #else | ||||
|     var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, | ||||
|                              x->plane[0].src.stride, | ||||
|                              vp10_64_zeros, 0, &sse); | ||||
|                              vp10_all_zeros, 0, &sse); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|     return (256 * var) >> num_pels_log2_lookup[bs]; | ||||
|   } | ||||
|   | ||||
| @@ -1893,7 +1893,7 @@ static void write_modes(VP10_COMP *const cpi, | ||||
|  | ||||
|     for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { | ||||
|       write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, 0, | ||||
|                              mi_row, mi_col, BLOCK_64X64); | ||||
|                              mi_row, mi_col, BLOCK_LARGEST); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|   | ||||
| @@ -28,7 +28,7 @@ typedef struct { | ||||
| } diff; | ||||
|  | ||||
| typedef struct macroblock_plane { | ||||
|   DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]); | ||||
|   DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]); | ||||
|   tran_low_t *qcoeff; | ||||
|   tran_low_t *coeff; | ||||
|   uint16_t *eobs; | ||||
| @@ -63,10 +63,10 @@ typedef struct { | ||||
| } MB_MODE_INFO_EXT; | ||||
|  | ||||
| typedef struct { | ||||
|   uint8_t best_palette_color_map[4096]; | ||||
|   double kmeans_data_buf[2 * 4096]; | ||||
|   uint8_t kmeans_indices_buf[4096]; | ||||
|   uint8_t kmeans_pre_indices_buf[4096]; | ||||
|   uint8_t best_palette_color_map[MAX_SB_SQUARE]; | ||||
|   double kmeans_data_buf[2 * MAX_SB_SQUARE]; | ||||
|   uint8_t kmeans_indices_buf[MAX_SB_SQUARE]; | ||||
|   uint8_t kmeans_pre_indices_buf[MAX_SB_SQUARE]; | ||||
| } PALETTE_BUFFER; | ||||
|  | ||||
| typedef struct macroblock MACROBLOCK; | ||||
| @@ -140,11 +140,11 @@ struct macroblock { | ||||
|  | ||||
|   // Notes transform blocks where no coefficents are coded. | ||||
|   // Set during mode selection. Read during block encoding. | ||||
|   uint8_t zcoeff_blk[TX_SIZES][256]; | ||||
|   uint8_t zcoeff_blk[TX_SIZES][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; | ||||
| #if CONFIG_VAR_TX | ||||
|   uint8_t blk_skip[MAX_MB_PLANE][256]; | ||||
|   uint8_t blk_skip[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; | ||||
| #if CONFIG_REF_MV | ||||
|   uint8_t blk_skip_drl[MAX_MB_PLANE][256]; | ||||
|   uint8_t blk_skip_drl[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
| @@ -164,12 +164,12 @@ struct macroblock { | ||||
|   int quant_fp; | ||||
|  | ||||
|   // skip forward transform and quantization | ||||
|   uint8_t skip_txfm[MAX_MB_PLANE][4]; | ||||
|   uint8_t skip_txfm[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB]; | ||||
|   #define SKIP_TXFM_NONE 0 | ||||
|   #define SKIP_TXFM_AC_DC 1 | ||||
|   #define SKIP_TXFM_AC_ONLY 2 | ||||
|  | ||||
|   int64_t bsse[MAX_MB_PLANE][4]; | ||||
|   int64_t bsse[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB]; | ||||
|  | ||||
|   // Used to store sub partition's choices. | ||||
|   MV pred_mv[MAX_REF_FRAMES]; | ||||
|   | ||||
| @@ -11,11 +11,14 @@ | ||||
| #include "vp10/encoder/context_tree.h" | ||||
| #include "vp10/encoder/encoder.h" | ||||
|  | ||||
| static const BLOCK_SIZE square[] = { | ||||
| static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 2] = { | ||||
|   BLOCK_8X8, | ||||
|   BLOCK_16X16, | ||||
|   BLOCK_32X32, | ||||
|   BLOCK_64X64, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   BLOCK_128X128, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk, | ||||
| @@ -53,6 +56,14 @@ static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk, | ||||
|       ctx->eobs_pbuf[i][k]    = ctx->eobs[i][k]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if (cm->allow_screen_content_tools) { | ||||
|     for (i = 0;  i < 2; ++i) { | ||||
|       CHECK_MEM_ERROR(cm, ctx->color_index_map[i], | ||||
|                     vpx_memalign(32, | ||||
|                                  num_pix * sizeof(*ctx->color_index_map[i]))); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| static void free_mode_context(PICK_MODE_CONTEXT *ctx) { | ||||
| @@ -177,8 +188,13 @@ static void free_tree_contexts(PC_TREE *tree) { | ||||
| // represents the state of our search. | ||||
| void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) { | ||||
|   int i, j; | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   const int leaf_nodes = 256; | ||||
|   const int tree_nodes = 256 + 64 + 16 + 4 + 1; | ||||
| #else | ||||
|   const int leaf_nodes = 64; | ||||
|   const int tree_nodes = 64 + 16 + 4 + 1; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   int pc_tree_index = 0; | ||||
|   PC_TREE *this_pc; | ||||
|   PICK_MODE_CONTEXT *this_leaf; | ||||
| @@ -217,7 +233,7 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) { | ||||
|  | ||||
|   // Each node has 4 leaf nodes, fill each block_size level of the tree | ||||
|   // from leafs to the root. | ||||
|   for (nodes = 16; nodes > 0; nodes >>= 2) { | ||||
|   for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) { | ||||
|     for (i = 0; i < nodes; ++i) { | ||||
|       PC_TREE *const tree = &td->pc_tree[pc_tree_index]; | ||||
|       alloc_tree_contexts(cm, tree, 4 << (2 * square_index)); | ||||
| @@ -233,11 +249,17 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) { | ||||
| } | ||||
|  | ||||
| void vp10_free_pc_tree(ThreadData *td) { | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   const int leaf_nodes = 256; | ||||
|   const int tree_nodes = 256 + 64 + 16 + 4 + 1; | ||||
| #else | ||||
|   const int leaf_nodes = 64; | ||||
|   const int tree_nodes = 64 + 16 + 4 + 1; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   int i; | ||||
|  | ||||
|   // Set up all 4x4 mode contexts | ||||
|   for (i = 0; i < 64; ++i) | ||||
|   for (i = 0; i < leaf_nodes; ++i) | ||||
|     free_mode_context(&td->leaf_tree[i]); | ||||
|  | ||||
|   // Sets up all the leaf nodes in the tree. | ||||
|   | ||||
| @@ -49,7 +49,6 @@ typedef struct { | ||||
|   // For current partition, only if all Y, U, and V transform blocks' | ||||
|   // coefficients are quantized to 0, skippable is set to 0. | ||||
|   int skippable; | ||||
|   uint8_t skip_txfm[MAX_MB_PLANE << 2]; | ||||
|   int best_mode_index; | ||||
|   int hybrid_pred_diff; | ||||
|   int comp_pred_diff; | ||||
|   | ||||
| @@ -189,7 +189,7 @@ int vp10_denoiser_filter_c(const uint8_t *sig, int sig_stride, | ||||
|  | ||||
| static uint8_t *block_start(uint8_t *framebuf, int stride, | ||||
|                             int mi_row, int mi_col) { | ||||
|   return framebuf + (stride * mi_row * 8) + (mi_col * 8); | ||||
|   return framebuf + (stride * mi_row * MI_SIZE) + (mi_col * MI_SIZE); | ||||
| } | ||||
|  | ||||
| static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, | ||||
|   | ||||
| @@ -93,7 +93,16 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, | ||||
| //  purposes of activity masking. | ||||
| // Eventually this should be replaced by custom no-reference routines, | ||||
| //  which will be faster. | ||||
| static const uint8_t VP9_VAR_OFFS[64] = { | ||||
| static const uint8_t VP10_VAR_OFFS[MAX_SB_SIZE] = { | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
| @@ -102,10 +111,20 @@ static const uint8_t VP9_VAR_OFFS[64] = { | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
| static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { | ||||
| static const uint16_t VP10_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = { | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
| @@ -114,9 +133,19 @@ static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128, | ||||
|     128, 128, 128, 128, 128, 128, 128, 128 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { | ||||
| static const uint16_t VP10_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = { | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
| @@ -125,9 +154,19 @@ static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, | ||||
|     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { | ||||
| static const uint16_t VP10_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = { | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
| @@ -136,6 +175,7 @@ static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, | ||||
|     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|  | ||||
| @@ -144,7 +184,7 @@ unsigned int vp10_get_sby_perpixel_variance(VP10_COMP *cpi, | ||||
|                                            BLOCK_SIZE bs) { | ||||
|   unsigned int sse; | ||||
|   const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, | ||||
|                                               VP9_VAR_OFFS, 0, &sse); | ||||
|                                               VP10_VAR_OFFS, 0, &sse); | ||||
|   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); | ||||
| } | ||||
|  | ||||
| @@ -155,18 +195,18 @@ unsigned int vp10_high_get_sby_perpixel_variance( | ||||
|   switch (bd) { | ||||
|     case 10: | ||||
|       var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, | ||||
|                                CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), | ||||
|                                CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10), | ||||
|                                0, &sse); | ||||
|       break; | ||||
|     case 12: | ||||
|       var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, | ||||
|                                CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), | ||||
|                                CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12), | ||||
|                                0, &sse); | ||||
|       break; | ||||
|     case 8: | ||||
|     default: | ||||
|       var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, | ||||
|                                CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), | ||||
|                                CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8), | ||||
|                                0, &sse); | ||||
|       break; | ||||
|   } | ||||
| @@ -406,6 +446,13 @@ typedef struct { | ||||
|   v32x32 split[4]; | ||||
| } v64x64; | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION | ||||
| typedef struct { | ||||
|   partition_variance part_variances; | ||||
|   v64x64 split[4]; | ||||
| } v128x128; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
| typedef struct { | ||||
|   partition_variance *part_variances; | ||||
|   var *split[4]; | ||||
| @@ -415,12 +462,24 @@ typedef enum { | ||||
|   V16X16, | ||||
|   V32X32, | ||||
|   V64X64, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   V128X128, | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| } TREE_LEVEL; | ||||
|  | ||||
| static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { | ||||
|   int i; | ||||
|   node->part_variances = NULL; | ||||
|   switch (bsize) { | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     case BLOCK_128X128: { | ||||
|       v128x128 *vt = (v128x128 *) data; | ||||
|       node->part_variances = &vt->part_variances; | ||||
|       for (i = 0; i < 4; i++) | ||||
|         node->split[i] = &vt->split[i].part_variances.none; | ||||
|       break; | ||||
|     } | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|     case BLOCK_64X64: { | ||||
|       v64x64 *vt = (v64x64 *) data; | ||||
|       node->part_variances = &vt->part_variances; | ||||
| @@ -770,7 +829,8 @@ static int choose_partitioning(VP10_COMP *cpi, | ||||
|   const uint8_t *d; | ||||
|   int sp; | ||||
|   int dp; | ||||
|   int pixels_wide = 64, pixels_high = 64; | ||||
|   int pixels_wide = 8 * num_8x8_blocks_wide_lookup[BLOCK_LARGEST]; | ||||
|   int pixels_high = 8 * num_8x8_blocks_high_lookup[BLOCK_LARGEST]; | ||||
|   int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], | ||||
|       cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]}; | ||||
|  | ||||
| @@ -781,10 +841,11 @@ static int choose_partitioning(VP10_COMP *cpi, | ||||
|   int variance4x4downsample[16]; | ||||
|  | ||||
|   int segment_id = CR_SEGMENT_ID_BASE; | ||||
|  | ||||
|   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { | ||||
|     const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : | ||||
|                                                     cm->last_frame_seg_map; | ||||
|     segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); | ||||
|     segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col); | ||||
|  | ||||
|     if (cyclic_refresh_segment_id_boosted(segment_id)) { | ||||
|       int q = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex); | ||||
| @@ -792,11 +853,12 @@ static int choose_partitioning(VP10_COMP *cpi, | ||||
|     } | ||||
|   } | ||||
|  | ||||
| #if CONFIG_EXT_PARTITION_TYPES | ||||
|   assert(0); | ||||
| #endif | ||||
| #if CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES | ||||
|   printf("Not yet implemented: choose_partitioning\n"); | ||||
|   exit(-1); | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
|   set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); | ||||
|   set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_LARGEST); | ||||
|  | ||||
|   if (xd->mb_to_right_edge < 0) | ||||
|     pixels_wide += (xd->mb_to_right_edge >> 3); | ||||
| @@ -813,8 +875,20 @@ static int choose_partitioning(VP10_COMP *cpi, | ||||
|  | ||||
|     const YV12_BUFFER_CONFIG *yv12_g = NULL; | ||||
|     unsigned int y_sad, y_sad_g; | ||||
|     const BLOCK_SIZE bsize = BLOCK_32X32 | ||||
|         + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows); | ||||
|  | ||||
|     const int max_mi_block_size = num_8x8_blocks_wide_lookup[BLOCK_LARGEST]; | ||||
|     const int is_right_edge = mi_col + max_mi_block_size / 2 > cm->mi_cols; | ||||
|     const int is_left_edge = mi_row + max_mi_block_size / 2 > cm->mi_rows; | ||||
|     BLOCK_SIZE bsize; | ||||
|  | ||||
|     if (is_right_edge && is_left_edge) | ||||
|       bsize = get_subsize(BLOCK_LARGEST, PARTITION_SPLIT); | ||||
|     else if (is_right_edge) | ||||
|       bsize = get_subsize(BLOCK_LARGEST, PARTITION_VERT); | ||||
|     else if (is_left_edge) | ||||
|       bsize = get_subsize(BLOCK_LARGEST, PARTITION_HORZ); | ||||
|     else | ||||
|       bsize = BLOCK_LARGEST; | ||||
|  | ||||
|     assert(yv12 != NULL); | ||||
|     yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); | ||||
| @@ -834,7 +908,7 @@ static int choose_partitioning(VP10_COMP *cpi, | ||||
|                          &cm->frame_refs[LAST_FRAME - 1].sf); | ||||
|     mbmi->ref_frame[0] = LAST_FRAME; | ||||
|     mbmi->ref_frame[1] = NONE; | ||||
|     mbmi->sb_type = BLOCK_64X64; | ||||
|     mbmi->sb_type = BLOCK_LARGEST; | ||||
|     mbmi->mv[0].as_int = 0; | ||||
|     mbmi->interp_filter = BILINEAR; | ||||
|  | ||||
| @@ -849,7 +923,7 @@ static int choose_partitioning(VP10_COMP *cpi, | ||||
|       x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv; | ||||
|     } | ||||
|  | ||||
|     vp10_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); | ||||
|     vp10_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_LARGEST); | ||||
|  | ||||
|     for (i = 1; i <= 2; ++i) { | ||||
|       struct macroblock_plane  *p = &x->plane[i]; | ||||
| @@ -868,33 +942,29 @@ static int choose_partitioning(VP10_COMP *cpi, | ||||
|     d = xd->plane[0].dst.buf; | ||||
|     dp = xd->plane[0].dst.stride; | ||||
|  | ||||
|     // If the y_sad is very small, take 64x64 as partition and exit. | ||||
|     // Don't check on boosted segment for now, as 64x64 is suppressed there. | ||||
|     if (segment_id == CR_SEGMENT_ID_BASE && | ||||
|         y_sad < cpi->vbp_threshold_sad) { | ||||
|       const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; | ||||
|       const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; | ||||
|       if (mi_col + block_width / 2 < cm->mi_cols && | ||||
|           mi_row + block_height / 2 < cm->mi_rows) { | ||||
|         set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64); | ||||
|     // If the y_sad is very small, take the largest partition and exit. | ||||
|     // Don't check on boosted segment for now, as largest is suppressed there. | ||||
|     if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) { | ||||
|       if (!is_right_edge && !is_left_edge) { | ||||
|         set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_LARGEST); | ||||
|         return 0; | ||||
|       } | ||||
|     } | ||||
|   } else { | ||||
|     d = VP9_VAR_OFFS; | ||||
|     d = VP10_VAR_OFFS; | ||||
|     dp = 0; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|       switch (xd->bd) { | ||||
|         case 10: | ||||
|           d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); | ||||
|           d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10); | ||||
|           break; | ||||
|         case 12: | ||||
|           d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); | ||||
|           d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12); | ||||
|           break; | ||||
|         case 8: | ||||
|         default: | ||||
|           d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); | ||||
|           d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8); | ||||
|           break; | ||||
|       } | ||||
|     } | ||||
| @@ -1699,15 +1769,6 @@ static void rd_pick_sb_modes(VP10_COMP *cpi, | ||||
|     p[i].eobs = ctx->eobs_pbuf[i][0]; | ||||
|   } | ||||
|  | ||||
|   if (cm->current_video_frame == 0 && cm->allow_screen_content_tools) { | ||||
|     for (i = 0; i < 2; ++i) { | ||||
|       if (ctx->color_index_map[i] == 0) { | ||||
|         CHECK_MEM_ERROR(cm, ctx->color_index_map[i], | ||||
|                         vpx_memalign(16, (ctx->num_4x4_blk << 4) * | ||||
|                                      sizeof(*ctx->color_index_map[i]))); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   for (i = 0; i < 2; ++i) | ||||
|     pd[i].color_index_map = ctx->color_index_map[i]; | ||||
|  | ||||
| @@ -2084,17 +2145,16 @@ static void update_stats(VP10_COMMON *cm, ThreadData *td | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| typedef struct { | ||||
|   ENTROPY_CONTEXT a[16 * MAX_MB_PLANE]; | ||||
|   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE]; | ||||
|   PARTITION_CONTEXT sa[8]; | ||||
|   PARTITION_CONTEXT sl[8]; | ||||
|   ENTROPY_CONTEXT a[2 * MI_BLOCK_SIZE * MAX_MB_PLANE]; | ||||
|   ENTROPY_CONTEXT l[2 * MI_BLOCK_SIZE * MAX_MB_PLANE]; | ||||
|   PARTITION_CONTEXT sa[MI_BLOCK_SIZE]; | ||||
|   PARTITION_CONTEXT sl[MI_BLOCK_SIZE]; | ||||
| #if CONFIG_VAR_TX | ||||
|   TXFM_CONTEXT *p_ta; | ||||
|   TXFM_CONTEXT *p_tl; | ||||
|   TXFM_CONTEXT ta[8]; | ||||
|   TXFM_CONTEXT tl[8]; | ||||
|   TXFM_CONTEXT ta[MI_BLOCK_SIZE]; | ||||
|   TXFM_CONTEXT tl[MI_BLOCK_SIZE]; | ||||
| #endif | ||||
| } RD_SEARCH_MACROBLOCK_CONTEXT; | ||||
|  | ||||
| @@ -2892,11 +2952,11 @@ static void rd_use_partition(VP10_COMP *cpi, | ||||
|  | ||||
|   // We must have chosen a partitioning and encoding or we'll fail later on. | ||||
|   // No other opportunities for success. | ||||
|   if (bsize == BLOCK_64X64) | ||||
|   if (bsize == BLOCK_LARGEST) | ||||
|     assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); | ||||
|  | ||||
|   if (do_recon) { | ||||
|     int output_enabled = (bsize == BLOCK_64X64); | ||||
|     int output_enabled = (bsize == BLOCK_LARGEST); | ||||
|     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, | ||||
|               pc_tree); | ||||
|   } | ||||
| @@ -2909,21 +2969,38 @@ static void rd_use_partition(VP10_COMP *cpi, | ||||
| } | ||||
|  | ||||
| static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { | ||||
|   BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4, | ||||
|   BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4, | ||||
|   BLOCK_8X8,   BLOCK_8X8,   BLOCK_8X8, | ||||
|   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, | ||||
|   BLOCK_16X16 | ||||
|                               BLOCK_4X4,    //                     4x4 | ||||
|     BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4,    //    4x8,    8x4,     8x8 | ||||
|     BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,    //   8x16,   16x8,   16x16 | ||||
|     BLOCK_8X8,   BLOCK_8X8, BLOCK_16X16,    //  16x32,  32x16,   32x32 | ||||
|   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,    //  32x64,  64x32,   64x64 | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16     // 64x128, 128x64, 128x128 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { | ||||
|   BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16, | ||||
|   BLOCK_16X16, BLOCK_32X32, BLOCK_32X32, | ||||
|   BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, | ||||
|   BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, | ||||
|   BLOCK_64X64 | ||||
|                                BLOCK_8X8,   //                     4x4 | ||||
|   BLOCK_16X16, BLOCK_16X16,  BLOCK_16X16,   //    4x8,    8x4,     8x8 | ||||
|   BLOCK_32X32, BLOCK_32X32,  BLOCK_32X32,   //   8x16,   16x8,   16x16 | ||||
|   BLOCK_64X64, BLOCK_64X64,  BLOCK_64X64,   //  16x32,  32x16,   32x32 | ||||
|   BLOCK_64X64, BLOCK_64X64,  BLOCK_64X64,   //  32x64,  64x32,   64x64 | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   BLOCK_64X64, BLOCK_64X64, BLOCK_128X128   // 64x128, 128x64, 128x128 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| // Next square block size less or equal than current block size. | ||||
| static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { | ||||
|                                 BLOCK_4X4,  //                     4x4 | ||||
|     BLOCK_4X4,   BLOCK_4X4,     BLOCK_8X8,  //    4x8,    8x4,     8x8 | ||||
|     BLOCK_8X8,   BLOCK_8X8,   BLOCK_16X16,  //   8x16,   16x8,   16x16 | ||||
|   BLOCK_16X16, BLOCK_16X16,   BLOCK_32X32,  //  16x32,  32x16,   32x32 | ||||
|   BLOCK_32X32, BLOCK_32X32,   BLOCK_64X64,  //  32x64,  64x32,   64x64 | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   BLOCK_64X64, BLOCK_64X64, BLOCK_128X128   // 64x128, 128x64, 128x128 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| // Look at all the mode_info entries for blocks that are part of this | ||||
| // partition and find the min and max values for sb_type. | ||||
| @@ -2954,15 +3031,6 @@ static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Next square block size less or equal than current block size. | ||||
| static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { | ||||
|   BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, | ||||
|   BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, | ||||
|   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, | ||||
|   BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, | ||||
|   BLOCK_64X64 | ||||
| }; | ||||
|  | ||||
| // Look at neighboring blocks and set a min and max partition size based on | ||||
| // what they chose. | ||||
| static void rd_auto_partition_range(VP10_COMP *cpi, const TileInfo *const tile, | ||||
| @@ -2978,13 +3046,13 @@ static void rd_auto_partition_range(VP10_COMP *cpi, const TileInfo *const tile, | ||||
|   const int col8x8_remaining = tile->mi_col_end - mi_col; | ||||
|   int bh, bw; | ||||
|   BLOCK_SIZE min_size = BLOCK_4X4; | ||||
|   BLOCK_SIZE max_size = BLOCK_64X64; | ||||
|   BLOCK_SIZE max_size = BLOCK_LARGEST; | ||||
|   int bs_hist[BLOCK_SIZES] = {0}; | ||||
|  | ||||
|   // Trap case where we do not have a prediction. | ||||
|   if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { | ||||
|     // Default "min to max" and "max to min" | ||||
|     min_size = BLOCK_64X64; | ||||
|     min_size = BLOCK_LARGEST; | ||||
|     max_size = BLOCK_4X4; | ||||
|  | ||||
|     // NOTE: each call to get_sb_partition_size_range() uses the previous | ||||
| @@ -3054,7 +3122,7 @@ static void set_partition_range(VP10_COMMON *cm, MACROBLOCKD *xd, | ||||
|   MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str]; | ||||
|   BLOCK_SIZE bs, min_size, max_size; | ||||
|  | ||||
|   min_size = BLOCK_64X64; | ||||
|   min_size = BLOCK_LARGEST; | ||||
|   max_size = BLOCK_4X4; | ||||
|  | ||||
|   if (prev_mi) { | ||||
| @@ -3104,16 +3172,27 @@ static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { | ||||
| } | ||||
|  | ||||
| #if CONFIG_FP_MB_STATS | ||||
| const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = | ||||
|   {1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4}; | ||||
| const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = | ||||
|   {1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4}; | ||||
| const int qindex_skip_threshold_lookup[BLOCK_SIZES] = | ||||
|   {0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120}; | ||||
|   {0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   // TODO(debargha): What are the correct numbers here? | ||||
|   130, 130, 150 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }; | ||||
| const int qindex_split_threshold_lookup[BLOCK_SIZES] = | ||||
|   {0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120}; | ||||
|   {0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   // TODO(debargha): What are the correct numbers here? | ||||
|   160, 160, 240 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }; | ||||
| const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = | ||||
|   {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6}; | ||||
|   {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6 | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   // TODO(debargha): What are the correct numbers here? | ||||
|   8, 8, 10 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|   }; | ||||
|  | ||||
| typedef enum { | ||||
|   MV_ZERO = 0, | ||||
| @@ -3526,8 +3605,8 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, | ||||
|           pc_tree->partitioning = PARTITION_NONE; | ||||
|  | ||||
|         // Adjust dist breakout threshold according to the partition size. | ||||
|         dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + | ||||
|             b_height_log2_lookup[bsize]); | ||||
|         dist_breakout_thr >>= (2 * (MAX_SB_SIZE_LOG2 - 2)) | ||||
|           - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); | ||||
|  | ||||
|         rate_breakout_thr *= num_pels_log2_lookup[bsize]; | ||||
|  | ||||
| @@ -4124,12 +4203,12 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, | ||||
|  | ||||
|   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && | ||||
|       pc_tree->index != 3) { | ||||
|     int output_enabled = (bsize == BLOCK_64X64); | ||||
|     int output_enabled = (bsize == BLOCK_LARGEST); | ||||
|     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, | ||||
|               bsize, pc_tree); | ||||
|   } | ||||
|  | ||||
|   if (bsize == BLOCK_64X64) { | ||||
|   if (bsize == BLOCK_LARGEST) { | ||||
|     assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip)); | ||||
|     assert(best_rdc.rate < INT_MAX); | ||||
|     assert(best_rdc.dist < INT64_MAX); | ||||
| @@ -4149,6 +4228,11 @@ static void encode_rd_sb_row(VP10_COMP *cpi, | ||||
|   MACROBLOCKD *const xd = &x->e_mbd; | ||||
|   SPEED_FEATURES *const sf = &cpi->sf; | ||||
|   int mi_col; | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   const int leaf_nodes = 256; | ||||
| #else | ||||
|   const int leaf_nodes = 64; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|  | ||||
|   // Initialize the left context for the new SB row | ||||
|   vp10_zero_left_context(xd); | ||||
| @@ -4170,10 +4254,10 @@ static void encode_rd_sb_row(VP10_COMP *cpi, | ||||
|     MODE_INFO **mi = cm->mi_grid_visible + idx_str; | ||||
|  | ||||
|     if (sf->adaptive_pred_interp_filter) { | ||||
|       for (i = 0; i < 64; ++i) | ||||
|       for (i = 0; i < leaf_nodes; ++i) | ||||
|         td->leaf_tree[i].pred_interp_filter = SWITCHABLE; | ||||
|  | ||||
|       for (i = 0; i < 64; ++i) { | ||||
|       for (i = 0; i < leaf_nodes; ++i) { | ||||
|         td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; | ||||
|         td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; | ||||
|         td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; | ||||
| @@ -4187,29 +4271,29 @@ static void encode_rd_sb_row(VP10_COMP *cpi, | ||||
|     if (seg->enabled) { | ||||
|       const uint8_t *const map = seg->update_map ? cpi->segmentation_map | ||||
|                                                  : cm->last_frame_seg_map; | ||||
|       int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); | ||||
|       int segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col); | ||||
|       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); | ||||
|     } | ||||
|  | ||||
|     x->source_variance = UINT_MAX; | ||||
|     if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { | ||||
|       const BLOCK_SIZE bsize = | ||||
|           seg_skip ? BLOCK_64X64 : sf->always_this_block_size; | ||||
|       set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); | ||||
|           seg_skip ? BLOCK_LARGEST : sf->always_this_block_size; | ||||
|       set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST); | ||||
|       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); | ||||
|       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, | ||||
|                        BLOCK_64X64, &dummy_rate, &dummy_dist, | ||||
|                        BLOCK_LARGEST, &dummy_rate, &dummy_dist, | ||||
| #if CONFIG_SUPERTX | ||||
|                        &dummy_rate_nocoef, | ||||
| #endif  // CONFIG_SUPERTX | ||||
|                        1, td->pc_root); | ||||
|     } else if (cpi->partition_search_skippable_frame) { | ||||
|       BLOCK_SIZE bsize; | ||||
|       set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); | ||||
|       set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST); | ||||
|       bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); | ||||
|       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); | ||||
|       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, | ||||
|                        BLOCK_64X64, &dummy_rate, &dummy_dist, | ||||
|                        BLOCK_LARGEST, &dummy_rate, &dummy_dist, | ||||
| #if CONFIG_SUPERTX | ||||
|                        &dummy_rate_nocoef, | ||||
| #endif  // CONFIG_SUPERTX | ||||
| @@ -4218,7 +4302,7 @@ static void encode_rd_sb_row(VP10_COMP *cpi, | ||||
|                cm->frame_type != KEY_FRAME) { | ||||
|       choose_partitioning(cpi, tile_info, x, mi_row, mi_col); | ||||
|       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, | ||||
|                        BLOCK_64X64, &dummy_rate, &dummy_dist, | ||||
|                        BLOCK_LARGEST, &dummy_rate, &dummy_dist, | ||||
| #if CONFIG_SUPERTX | ||||
|                        &dummy_rate_nocoef, | ||||
| #endif  // CONFIG_SUPERTX | ||||
| @@ -4226,12 +4310,12 @@ static void encode_rd_sb_row(VP10_COMP *cpi, | ||||
|     } else { | ||||
|       // If required set upper and lower partition size limits | ||||
|       if (sf->auto_min_max_partition_size) { | ||||
|         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); | ||||
|         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST); | ||||
|         rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, | ||||
|                                 &x->min_partition_size, | ||||
|                                 &x->max_partition_size); | ||||
|       } | ||||
|       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, | ||||
|       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_LARGEST, | ||||
|                         &dummy_rdc, | ||||
| #if CONFIG_SUPERTX | ||||
|                         &dummy_rate_nocoef, | ||||
| @@ -4930,19 +5014,15 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, | ||||
| #if CONFIG_OBMC | ||||
|     if (mbmi->obmc) { | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|       DECLARE_ALIGNED(16, uint8_t, | ||||
|                       tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); | ||||
|       DECLARE_ALIGNED(16, uint8_t, | ||||
|                       tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); | ||||
|       DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
|       DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
| #else | ||||
|       DECLARE_ALIGNED(16, uint8_t, | ||||
|                       tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); | ||||
|       DECLARE_ALIGNED(16, uint8_t, | ||||
|                       tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); | ||||
|       DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
|       DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|       uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; | ||||
|       int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; | ||||
|       int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; | ||||
|       int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; | ||||
|       int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; | ||||
|  | ||||
|       assert(mbmi->sb_type >= BLOCK_8X8); | ||||
|  | ||||
| @@ -4950,21 +5030,19 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, | ||||
|       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|         int len = sizeof(uint16_t); | ||||
|         dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); | ||||
|         dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len); | ||||
|         dst_buf1[2] = CONVERT_TO_BYTEPTR( | ||||
|             tmp_buf1 + CU_SIZE * CU_SIZE * 2 * len); | ||||
|         dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len); | ||||
|         dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len); | ||||
|         dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); | ||||
|         dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len); | ||||
|         dst_buf2[2] = CONVERT_TO_BYTEPTR( | ||||
|             tmp_buf2 + CU_SIZE * CU_SIZE * 2 * len); | ||||
|         dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len); | ||||
|         dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len); | ||||
|       } else { | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|       dst_buf1[0] = tmp_buf1; | ||||
|       dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE; | ||||
|       dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2; | ||||
|       dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE; | ||||
|       dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2; | ||||
|       dst_buf2[0] = tmp_buf2; | ||||
|       dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE; | ||||
|       dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2; | ||||
|       dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE; | ||||
|       dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|       } | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
| @@ -5447,38 +5525,35 @@ static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td, | ||||
|  | ||||
|   int i, ctx; | ||||
|   uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; | ||||
|   DECLARE_ALIGNED(16, uint8_t, | ||||
|                   tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, | ||||
|                   tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, | ||||
|                   tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); | ||||
|   int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; | ||||
|   int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; | ||||
|   int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf3[MAX_MB_PLANE * MAX_TX_SQUARE * 2]); | ||||
|   int dst_stride1[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; | ||||
|   int dst_stride2[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; | ||||
|   int dst_stride3[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE}; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|     int len = sizeof(uint16_t); | ||||
|     dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); | ||||
|     dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_TX_SQUARE * len); | ||||
|     dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_TX_SQUARE * len); | ||||
|     dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); | ||||
|     dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_TX_SQUARE * len); | ||||
|     dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_TX_SQUARE * len); | ||||
|     dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3); | ||||
|     dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len); | ||||
|     dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAX_TX_SQUARE * len); | ||||
|     dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAX_TX_SQUARE * len); | ||||
|   } else { | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|     dst_buf1[0] = tmp_buf1; | ||||
|     dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf1[1] = tmp_buf1 + MAX_TX_SQUARE; | ||||
|     dst_buf1[2] = tmp_buf1 + 2 * MAX_TX_SQUARE; | ||||
|     dst_buf2[0] = tmp_buf2; | ||||
|     dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf2[1] = tmp_buf2 + MAX_TX_SQUARE; | ||||
|     dst_buf2[2] = tmp_buf2 + 2 * MAX_TX_SQUARE; | ||||
|     dst_buf3[0] = tmp_buf3; | ||||
|     dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN; | ||||
|     dst_buf3[1] = tmp_buf3 + MAX_TX_SQUARE; | ||||
|     dst_buf3[2] = tmp_buf3 + 2 * MAX_TX_SQUARE; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   } | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
| @@ -6037,7 +6112,8 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, | ||||
|   sse_uv = 0; | ||||
|   for (plane = 1; plane < MAX_MB_PLANE; ++plane) { | ||||
| #if CONFIG_VAR_TX | ||||
|     ENTROPY_CONTEXT ctxa[16], ctxl[16]; | ||||
|     ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE]; | ||||
|     ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE]; | ||||
|     const struct macroblockd_plane *const pd = &xd->plane[plane]; | ||||
|     int coeff_ctx = 1; | ||||
|  | ||||
| @@ -6081,7 +6157,8 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, | ||||
| #endif  // CONFIG_EXT_TX | ||||
|   for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { | ||||
| #if CONFIG_VAR_TX | ||||
|     ENTROPY_CONTEXT ctxa[16], ctxl[16]; | ||||
|     ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE]; | ||||
|     ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE]; | ||||
|     const struct macroblockd_plane *const pd = &xd->plane[0]; | ||||
|     int coeff_ctx = 1; | ||||
| #endif  // CONFIG_VAR_TX | ||||
|   | ||||
| @@ -29,8 +29,8 @@ | ||||
| #include "vp10/encoder/tokenize.h" | ||||
|  | ||||
| struct optimize_ctx { | ||||
|   ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; | ||||
|   ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; | ||||
|   ENTROPY_CONTEXT ta[MAX_MB_PLANE][2 * MI_BLOCK_SIZE]; | ||||
|   ENTROPY_CONTEXT tl[MAX_MB_PLANE][2 * MI_BLOCK_SIZE]; | ||||
| }; | ||||
|  | ||||
| void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { | ||||
| @@ -96,9 +96,9 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, | ||||
|   struct macroblock_plane *const p = &mb->plane[plane]; | ||||
|   struct macroblockd_plane *const pd = &xd->plane[plane]; | ||||
|   const int ref = is_inter_block(&xd->mi[0]->mbmi); | ||||
|   vp10_token_state tokens[1025][2]; | ||||
|   unsigned best_index[1025][2]; | ||||
|   uint8_t token_cache[1024]; | ||||
|   vp10_token_state tokens[MAX_TX_SQUARE+1][2]; | ||||
|   unsigned best_index[MAX_TX_SQUARE+1][2]; | ||||
|   uint8_t token_cache[MAX_TX_SQUARE]; | ||||
|   const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); | ||||
|   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); | ||||
|   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); | ||||
|   | ||||
| @@ -1955,6 +1955,8 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) { | ||||
|       CHECK_MEM_ERROR(cm, x->palette_buffer, | ||||
|                       vpx_memalign(16, sizeof(*x->palette_buffer))); | ||||
|     } | ||||
|     vp10_free_pc_tree(&cpi->td); | ||||
|     vp10_setup_pc_tree(&cpi->common, &cpi->td); | ||||
|   } | ||||
|  | ||||
|   vp10_reset_segment_features(cm); | ||||
| @@ -3147,7 +3149,7 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) { | ||||
|   } | ||||
|  | ||||
|   if (lf->filter_level > 0) { | ||||
| #if CONFIG_VAR_TX | ||||
| #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION | ||||
|     vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); | ||||
| #else | ||||
|     if (cpi->num_workers > 1) | ||||
|   | ||||
| @@ -312,8 +312,8 @@ typedef struct VP10_COMP { | ||||
|   QUANTS quants; | ||||
|   ThreadData td; | ||||
|   MB_MODE_INFO_EXT *mbmi_ext_base; | ||||
|   DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); | ||||
|   DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); | ||||
|   DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]);   // 8: SIMD width | ||||
|   DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]);  // 8: SIMD width | ||||
|   VP10_COMMON common; | ||||
|   VP10EncoderConfig oxcf; | ||||
|   struct lookahead_ctx    *lookahead; | ||||
|   | ||||
| @@ -366,13 +366,13 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd, | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   if (second_pred != NULL) { | ||||
|     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|       DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); | ||||
|       DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]); | ||||
|       vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, | ||||
|                                y_stride); | ||||
|       besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, | ||||
|                         sse1); | ||||
|     } else { | ||||
|       DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); | ||||
|       DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); | ||||
|       vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); | ||||
|       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); | ||||
|     } | ||||
| @@ -384,7 +384,7 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd, | ||||
| #else | ||||
|   (void) xd; | ||||
|   if (second_pred != NULL) { | ||||
|     DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); | ||||
|     DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); | ||||
|     vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); | ||||
|     besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); | ||||
|   } else { | ||||
| @@ -694,7 +694,7 @@ static int upsampled_pref_error(const MACROBLOCKD *xd, | ||||
|   unsigned int besterr; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|     DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]); | ||||
|     DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]); | ||||
|     if (second_pred != NULL) | ||||
|       vpx_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y, | ||||
|                                          y_stride); | ||||
| @@ -704,9 +704,9 @@ static int upsampled_pref_error(const MACROBLOCKD *xd, | ||||
|     besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, | ||||
|                       sse); | ||||
|   } else { | ||||
|     DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); | ||||
|     DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); | ||||
| #else | ||||
|     DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); | ||||
|     DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); | ||||
|     (void) xd; | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|     if (second_pred != NULL) | ||||
| @@ -1961,10 +1961,10 @@ unsigned int vp10_int_pro_motion_estimation(const VP10_COMP *cpi, MACROBLOCK *x, | ||||
|   MACROBLOCKD *xd = &x->e_mbd; | ||||
|   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; | ||||
|   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; | ||||
|   DECLARE_ALIGNED(16, int16_t, hbuf[128]); | ||||
|   DECLARE_ALIGNED(16, int16_t, vbuf[128]); | ||||
|   DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); | ||||
|   DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); | ||||
|   DECLARE_ALIGNED(16, int16_t, hbuf[2 * MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, int16_t, vbuf[2 * MAX_SB_SIZE]); | ||||
|   DECLARE_ALIGNED(16, int16_t, src_hbuf[MAX_SB_SQUARE]); | ||||
|   DECLARE_ALIGNED(16, int16_t, src_vbuf[MAX_SB_SQUARE]); | ||||
|   int idx; | ||||
|   const int bw = 4 << b_width_log2_lookup[bsize]; | ||||
|   const int bh = 4 << b_height_log2_lookup[bsize]; | ||||
|   | ||||
| @@ -41,7 +41,7 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, | ||||
|   VP10_COMMON *const cm = &cpi->common; | ||||
|   int64_t filt_err; | ||||
|  | ||||
| #if CONFIG_VAR_TX | ||||
| #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION | ||||
|   vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, | ||||
|                          1, partial_frame); | ||||
| #else | ||||
|   | ||||
| @@ -461,7 +461,7 @@ void vp10_init_quantizer(VP10_COMP *cpi) { | ||||
|       cpi->uv_dequant[q][i] = quant; | ||||
|     } | ||||
|  | ||||
|     for (i = 2; i < 8; i++) { | ||||
|     for (i = 2; i < 8; i++) {  // 8: SIMD width | ||||
|       quants->y_quant[q][i] = quants->y_quant[q][1]; | ||||
|       quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1]; | ||||
|       quants->y_round_fp[q][i] = quants->y_round_fp[q][1]; | ||||
|   | ||||
| @@ -27,6 +27,7 @@ typedef void (*VP10_QUANT_FACADE)(const tran_low_t *coeff_ptr, | ||||
|                                   const scan_order *sc); | ||||
|  | ||||
| typedef struct { | ||||
|   // 0: dc 1: ac 2-8: ac repeated to SIMD width | ||||
|   DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); | ||||
|   DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); | ||||
|   DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); | ||||
|   | ||||
| @@ -62,7 +62,10 @@ void vp10_rd_cost_init(RD_COST *rd_cost) { | ||||
| // This table is used to correct for block size. | ||||
| // The factors here are << 2 (2 = x0.5, 32 = x8 etc). | ||||
| static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = { | ||||
|   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32 | ||||
|   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, | ||||
| #if CONFIG_EXT_PARTITION | ||||
|   48, 48, 64 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| }; | ||||
|  | ||||
| static void fill_mode_costs(VP10_COMP *cpi) { | ||||
| @@ -560,8 +563,8 @@ void vp10_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, | ||||
|  | ||||
| void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, | ||||
|                               const struct macroblockd_plane *pd, | ||||
|                               ENTROPY_CONTEXT t_above[16], | ||||
|                               ENTROPY_CONTEXT t_left[16]) { | ||||
|                               ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE], | ||||
|                               ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]) { | ||||
|   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); | ||||
|   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; | ||||
|   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; | ||||
| @@ -935,7 +938,7 @@ void vp10_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, | ||||
|     int mode; | ||||
|     for (mode = 0; mode < top_mode; ++mode) { | ||||
|       const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4); | ||||
|       const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64); | ||||
|       const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_LARGEST); | ||||
|       BLOCK_SIZE bs; | ||||
|       for (bs = min_size; bs <= max_size; ++bs) { | ||||
|         int *const fact = &factor_buf[bs][mode]; | ||||
|   | ||||
| @@ -330,8 +330,8 @@ void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame); | ||||
|  | ||||
| void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, | ||||
|                               const struct macroblockd_plane *pd, | ||||
|                               ENTROPY_CONTEXT t_above[16], | ||||
|                               ENTROPY_CONTEXT t_left[16]); | ||||
|                               ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE], | ||||
|                               ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]); | ||||
|  | ||||
| void vp10_set_rd_speed_thresholds(struct VP10_COMP *cpi); | ||||
|  | ||||
|   | ||||
| @@ -102,8 +102,8 @@ typedef struct { | ||||
| struct rdcost_block_args { | ||||
|   const VP10_COMP *cpi; | ||||
|   MACROBLOCK *x; | ||||
|   ENTROPY_CONTEXT t_above[16]; | ||||
|   ENTROPY_CONTEXT t_left[16]; | ||||
|   ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE]; | ||||
|   ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]; | ||||
|   int this_rate; | ||||
|   int64_t this_dist; | ||||
|   int64_t this_sse; | ||||
| @@ -376,8 +376,8 @@ static void get_energy_distribution_fine(const VP10_COMP *cpi, | ||||
|   unsigned int esq[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; | ||||
|   unsigned int var[16]; | ||||
|   double total = 0; | ||||
|   const int f_index = bsize - 6; | ||||
|  | ||||
|   const int f_index = bsize - BLOCK_16X16; | ||||
|   if (f_index < 0) { | ||||
|     int i, j, index; | ||||
|     int w_shift = bw == 8 ? 1 : 2; | ||||
| @@ -890,7 +890,7 @@ static int cost_coeffs(MACROBLOCK *x, | ||||
|   const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); | ||||
|   unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = | ||||
|                    x->token_costs[tx_size][type][is_inter_block(mbmi)]; | ||||
|   uint8_t token_cache[32 * 32]; | ||||
|   uint8_t token_cache[MAX_TX_SQUARE]; | ||||
| #if CONFIG_VAR_TX | ||||
|   int pt = coeff_ctx; | ||||
| #else | ||||
| @@ -1045,10 +1045,10 @@ static void dist_block(const VP10_COMP *cpi, MACROBLOCK *x, int plane, | ||||
|     if (*eob) { | ||||
|       const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|       DECLARE_ALIGNED(16, uint16_t, recon16[32 * 32]);  // MAX TX_SIZE**2 | ||||
|       DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]); | ||||
|       uint8_t *recon = (uint8_t*)recon16; | ||||
| #else | ||||
|       DECLARE_ALIGNED(16, uint8_t, recon[32 * 32]);     // MAX TX_SIZE**2 | ||||
|       DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|  | ||||
|       const PLANE_TYPE plane_type = plane == 0 ? PLANE_TYPE_Y : PLANE_TYPE_UV; | ||||
| @@ -1064,18 +1064,18 @@ static void dist_block(const VP10_COMP *cpi, MACROBLOCK *x, int plane, | ||||
|       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|         recon = CONVERT_TO_BYTEPTR(recon); | ||||
|         inv_txfm_param.bd = xd->bd; | ||||
|         vpx_highbd_convolve_copy(dst, dst_stride, recon, 32, | ||||
|         vpx_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, | ||||
|                                  NULL, 0, NULL, 0, bs, bs, xd->bd); | ||||
|         highbd_inv_txfm_add(dqcoeff, recon, 32, &inv_txfm_param); | ||||
|         highbd_inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param); | ||||
|       } else | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|       { | ||||
|         vpx_convolve_copy(dst, dst_stride, recon, 32, | ||||
|         vpx_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, | ||||
|                           NULL, 0, NULL, 0, bs, bs); | ||||
|         inv_txfm_add(dqcoeff, recon, 32, &inv_txfm_param); | ||||
|         inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param); | ||||
|       } | ||||
|  | ||||
|       cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, 32, &tmp); | ||||
|       cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, MAX_TX_SIZE, &tmp); | ||||
|     } | ||||
|  | ||||
|     *out_dist = (int64_t)tmp * 16; | ||||
| @@ -2838,10 +2838,10 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, | ||||
|   uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col]; | ||||
|   uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   DECLARE_ALIGNED(16, uint16_t, rec_buffer_alloc_16[32 * 32]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]); | ||||
|   uint8_t *rec_buffer; | ||||
| #else | ||||
|   DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; | ||||
|   const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; | ||||
| @@ -2860,16 +2860,16 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, | ||||
|   // TODO(any): Use dist_block to compute distortion | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|     rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer_alloc_16); | ||||
|     vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, | ||||
|     rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16); | ||||
|     vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, | ||||
|                              NULL, 0, NULL, 0, bh, bh, xd->bd); | ||||
|   } else { | ||||
|     rec_buffer = (uint8_t *)rec_buffer_alloc_16; | ||||
|     vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, | ||||
|     rec_buffer = (uint8_t *)rec_buffer16; | ||||
|     vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, | ||||
|                       NULL, 0, NULL, 0, bh, bh); | ||||
|   } | ||||
| #else | ||||
|   vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, | ||||
|   vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, | ||||
|                     NULL, 0, NULL, 0, bh, bh); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|  | ||||
| @@ -2904,12 +2904,12 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|       inv_txfm_param.bd = xd->bd; | ||||
|       highbd_inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param); | ||||
|       highbd_inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param); | ||||
|     } else { | ||||
|       inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param); | ||||
|       inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param); | ||||
|     } | ||||
| #else  // CONFIG_VP9_HIGHBITDEPTH | ||||
|     inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param); | ||||
|     inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|  | ||||
|     if ((bh >> 2) + blk_col > max_blocks_wide || | ||||
| @@ -2921,16 +2921,16 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, | ||||
|       tmp = 0; | ||||
|       for (idy = 0; idy < blocks_height; idy += 2) { | ||||
|         for (idx = 0; idx < blocks_width; idx += 2) { | ||||
|           cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx, | ||||
|                                     src_stride, | ||||
|                                     rec_buffer + 4 * idy * 32 + 4 * idx, | ||||
|                                     32, &this_dist); | ||||
|           uint8_t *const s = src + 4 * idy * src_stride + 4 * idx; | ||||
|           uint8_t *const r = rec_buffer + 4 * idy * MAX_TX_SIZE + 4 * idx; | ||||
|           cpi->fn_ptr[BLOCK_8X8].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist); | ||||
|           tmp += this_dist; | ||||
|         } | ||||
|       } | ||||
|     } else { | ||||
|       uint32_t this_dist; | ||||
|       cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &this_dist); | ||||
|       cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE, | ||||
|                                 &this_dist); | ||||
|       tmp = this_dist; | ||||
|     } | ||||
|   } | ||||
| @@ -3125,8 +3125,10 @@ static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x, | ||||
|     int idx, idy; | ||||
|     int block = 0; | ||||
|     int step = 1 << (max_txsize_lookup[plane_bsize] * 2); | ||||
|     ENTROPY_CONTEXT ctxa[16], ctxl[16]; | ||||
|     TXFM_CONTEXT tx_above[8], tx_left[8]; | ||||
|     ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE]; | ||||
|     ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE]; | ||||
|     TXFM_CONTEXT tx_above[MI_BLOCK_SIZE]; | ||||
|     TXFM_CONTEXT tx_left[MI_BLOCK_SIZE]; | ||||
|  | ||||
|     int pnrate = 0, pnskip = 1; | ||||
|     int64_t pndist = 0, pnsse = 0; | ||||
| @@ -3240,7 +3242,7 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x, | ||||
|   const int is_inter = is_inter_block(mbmi); | ||||
|   TX_SIZE best_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; | ||||
|   TX_SIZE best_tx = TX_SIZES; | ||||
|   uint8_t best_blk_skip[256]; | ||||
|   uint8_t best_blk_skip[MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; | ||||
|   const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4); | ||||
|   int idx, idy; | ||||
|   int prune = 0; | ||||
| @@ -3423,7 +3425,8 @@ static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x, | ||||
|     int step = 1 << (max_txsize_lookup[plane_bsize] * 2); | ||||
|     int pnrate = 0, pnskip = 1; | ||||
|     int64_t pndist = 0, pnsse = 0; | ||||
|     ENTROPY_CONTEXT ta[16], tl[16]; | ||||
|     ENTROPY_CONTEXT ta[2 * MI_BLOCK_SIZE]; | ||||
|     ENTROPY_CONTEXT tl[2 * MI_BLOCK_SIZE]; | ||||
|  | ||||
|     vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl); | ||||
|  | ||||
| @@ -4560,10 +4563,10 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|  | ||||
|   // Prediction buffer from second frame. | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]); | ||||
|   uint8_t *second_pred; | ||||
| #else | ||||
|   DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|  | ||||
|   for (ref = 0; ref < 2; ++ref) { | ||||
| @@ -5733,9 +5736,9 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|     step_param = cpi->mv_step_param; | ||||
|   } | ||||
|  | ||||
|   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) { | ||||
|   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST) { | ||||
|     int boffset = | ||||
|         2 * (b_width_log2_lookup[BLOCK_64X64] - | ||||
|         2 * (b_width_log2_lookup[BLOCK_LARGEST] - | ||||
|              VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); | ||||
|     step_param = VPXMAX(step_param, boffset); | ||||
|   } | ||||
| @@ -6202,16 +6205,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|   const int * const intra_mode_cost = | ||||
|     cpi->mbmode_cost[size_group_lookup[bsize]]; | ||||
|   const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME); | ||||
|   const int tmp_buf_sz = CU_SIZE * CU_SIZE; | ||||
| #if CONFIG_REF_MV | ||||
|   uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame); | ||||
| #endif | ||||
| #endif  // CONFIG_EXT_INTER | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
|   uint8_t *tmp_buf; | ||||
| #else | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|  | ||||
| #if CONFIG_OBMC | ||||
| @@ -6226,7 +6228,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|   int best_rate_y, best_rate_uv; | ||||
| #endif  // CONFIG_SUPERTX | ||||
| #if CONFIG_VAR_TX | ||||
|   uint8_t best_blk_skip[3][256]; | ||||
|   uint8_t best_blk_skip[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4]; | ||||
| #endif  // CONFIG_VAR_TX | ||||
|   int64_t best_distortion = INT64_MAX; | ||||
|   unsigned int best_pred_var = UINT_MAX; | ||||
| @@ -6241,8 +6243,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|   int orig_dst_stride[MAX_MB_PLANE]; | ||||
|   int rs = 0; | ||||
|   INTERP_FILTER best_filter = SWITCHABLE; | ||||
|   uint8_t skip_txfm[MAX_MB_PLANE][4] = {{0}}; | ||||
|   int64_t bsse[MAX_MB_PLANE][4] = {{0}}; | ||||
|   uint8_t skip_txfm[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB] = {{0}}; | ||||
|   int64_t bsse[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB] = {{0}}; | ||||
|  | ||||
|   int skip_txfm_sb = 0; | ||||
|   int64_t skip_sse_sb = INT64_MAX; | ||||
| @@ -6569,8 +6571,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|           restore_dst_buf(xd, orig_dst, orig_dst_stride); | ||||
|         } else { | ||||
|           for (j = 0; j < MAX_MB_PLANE; j++) { | ||||
|             xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; | ||||
|             xd->plane[j].dst.stride = 64; | ||||
|             xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE; | ||||
|             xd->plane[j].dst.stride = MAX_SB_SIZE; | ||||
|           } | ||||
|         } | ||||
|         vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); | ||||
| @@ -6648,15 +6650,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|     if (have_newmv_in_inter_mode(this_mode)) { | ||||
|       int_mv tmp_mv[2]; | ||||
|       int rate_mvs[2], tmp_rate_mv = 0; | ||||
|       uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3]; | ||||
|       uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3]; | ||||
|       uint8_t pred0[2 * MAX_SB_SQUARE * 3]; | ||||
|       uint8_t pred1[2 * MAX_SB_SQUARE * 3]; | ||||
|       uint8_t *preds0[3] = {pred0, | ||||
|                             pred0 + 2 * CU_SIZE * CU_SIZE, | ||||
|                             pred0 + 4 * CU_SIZE * CU_SIZE}; | ||||
|                             pred0 + 2 * MAX_SB_SQUARE, | ||||
|                             pred0 + 4 * MAX_SB_SQUARE}; | ||||
|       uint8_t *preds1[3] = {pred1, | ||||
|                             pred1 + 2 * CU_SIZE * CU_SIZE, | ||||
|                             pred1 + 4 * CU_SIZE * CU_SIZE}; | ||||
|       int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE}; | ||||
|                             pred1 + 2 * MAX_SB_SQUARE, | ||||
|                             pred1 + 4 * MAX_SB_SQUARE}; | ||||
|       int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; | ||||
|       vp10_build_inter_predictors_for_planes_single_buf( | ||||
|           xd, bsize, mi_row, mi_col, 0, preds0, strides); | ||||
|       vp10_build_inter_predictors_for_planes_single_buf( | ||||
| @@ -6723,15 +6725,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|         mbmi->mv[1].as_int = cur_mv[1].as_int; | ||||
|       } | ||||
|     } else { | ||||
|       uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3]; | ||||
|       uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3]; | ||||
|       uint8_t pred0[2 * MAX_SB_SQUARE * 3]; | ||||
|       uint8_t pred1[2 * MAX_SB_SQUARE * 3]; | ||||
|       uint8_t *preds0[3] = {pred0, | ||||
|                             pred0 + 2 * CU_SIZE * CU_SIZE, | ||||
|                             pred0 + 4 * CU_SIZE * CU_SIZE}; | ||||
|                             pred0 + 2 * MAX_SB_SQUARE, | ||||
|                             pred0 + 4 * MAX_SB_SQUARE}; | ||||
|       uint8_t *preds1[3] = {pred1, | ||||
|                             pred1 + 2 * CU_SIZE * CU_SIZE, | ||||
|                             pred1 + 4 * CU_SIZE * CU_SIZE}; | ||||
|       int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE}; | ||||
|                             pred1 + 2 * MAX_SB_SQUARE, | ||||
|                             pred1 + 4 * MAX_SB_SQUARE}; | ||||
|       int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; | ||||
|       vp10_build_inter_predictors_for_planes_single_buf( | ||||
|           xd, bsize, mi_row, mi_col, 0, preds0, strides); | ||||
|       vp10_build_inter_predictors_for_planes_single_buf( | ||||
| @@ -6791,8 +6793,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|     int tmp_rate_mv = 0; | ||||
|     mbmi->ref_frame[1] = NONE; | ||||
|     for (j = 0; j < MAX_MB_PLANE; j++) { | ||||
|       xd->plane[j].dst.buf = tmp_buf + j * tmp_buf_sz; | ||||
|       xd->plane[j].dst.stride = CU_SIZE; | ||||
|       xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE; | ||||
|       xd->plane[j].dst.stride = MAX_SB_SIZE; | ||||
|     } | ||||
|     vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); | ||||
|     restore_dst_buf(xd, orig_dst, orig_dst_stride); | ||||
| @@ -6805,11 +6807,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|       rmode = intra_mode_cost[mbmi->interintra_mode]; | ||||
|       vp10_build_interintra_predictors(xd, | ||||
|                                        tmp_buf, | ||||
|                                        tmp_buf + tmp_buf_sz, | ||||
|                                        tmp_buf + 2 * tmp_buf_sz, | ||||
|                                        CU_SIZE, | ||||
|                                        CU_SIZE, | ||||
|                                        CU_SIZE, | ||||
|                                        tmp_buf + MAX_SB_SQUARE, | ||||
|                                        tmp_buf + 2 * MAX_SB_SQUARE, | ||||
|                                        MAX_SB_SIZE, | ||||
|                                        MAX_SB_SIZE, | ||||
|                                        MAX_SB_SIZE, | ||||
|                                        bsize); | ||||
|       model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, | ||||
|                       &skip_txfm_sb, &skip_sse_sb); | ||||
| @@ -6830,11 +6832,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|     if (wedge_bits) { | ||||
|       vp10_build_interintra_predictors(xd, | ||||
|                                        tmp_buf, | ||||
|                                        tmp_buf + tmp_buf_sz, | ||||
|                                        tmp_buf + 2 * tmp_buf_sz, | ||||
|                                        CU_SIZE, | ||||
|                                        CU_SIZE, | ||||
|                                        CU_SIZE, | ||||
|                                        tmp_buf + MAX_SB_SQUARE, | ||||
|                                        tmp_buf + 2 * MAX_SB_SQUARE, | ||||
|                                        MAX_SB_SIZE, | ||||
|                                        MAX_SB_SIZE, | ||||
|                                        MAX_SB_SIZE, | ||||
|                                        bsize); | ||||
|       model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, | ||||
|                       &skip_txfm_sb, &skip_sse_sb); | ||||
| @@ -6852,11 +6854,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|         mbmi->interintra_uv_wedge_index = wedge_index; | ||||
|         vp10_build_interintra_predictors(xd, | ||||
|                                          tmp_buf, | ||||
|                                          tmp_buf + tmp_buf_sz, | ||||
|                                          tmp_buf + 2 * tmp_buf_sz, | ||||
|                                          CU_SIZE, | ||||
|                                          CU_SIZE, | ||||
|                                          CU_SIZE, | ||||
|                                          tmp_buf + MAX_SB_SQUARE, | ||||
|                                          tmp_buf + 2 * MAX_SB_SQUARE, | ||||
|                                          MAX_SB_SIZE, | ||||
|                                          MAX_SB_SIZE, | ||||
|                                          MAX_SB_SIZE, | ||||
|                                          bsize); | ||||
|         model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, | ||||
|                         &skip_txfm_sb, &skip_sse_sb); | ||||
| @@ -6937,8 +6939,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, | ||||
|     if (best_needs_copy) { | ||||
|       // again temporarily set the buffers to local memory to prevent a memcpy | ||||
|       for (i = 0; i < MAX_MB_PLANE; i++) { | ||||
|         xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; | ||||
|         xd->plane[i].dst.stride = 64; | ||||
|         xd->plane[i].dst.buf = tmp_buf + i * MAX_SB_SQUARE; | ||||
|         xd->plane[i].dst.stride = MAX_SB_SIZE; | ||||
|       } | ||||
|     } | ||||
|     rd = tmp_rd; | ||||
| @@ -7572,33 +7574,33 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, | ||||
|   const MODE_INFO *left_mi = xd->left_mi; | ||||
| #if CONFIG_OBMC | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
| #else | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]); | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|   uint8_t *dst_buf1[3], *dst_buf2[3]; | ||||
|   int dst_stride1[3] = {64, 64, 64}; | ||||
|   int dst_stride2[3] = {64, 64, 64}; | ||||
|   uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; | ||||
|   int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; | ||||
|   int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; | ||||
|  | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { | ||||
|     int len = sizeof(uint16_t); | ||||
|     dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); | ||||
|     dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len); | ||||
|     dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len); | ||||
|     dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len); | ||||
|     dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_SB_SQUARE * len); | ||||
|     dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); | ||||
|     dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len); | ||||
|     dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len); | ||||
|     dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len); | ||||
|     dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_SB_SQUARE * len); | ||||
|   } else { | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
|   dst_buf1[0] = tmp_buf1; | ||||
|   dst_buf1[1] = tmp_buf1 + 4096; | ||||
|   dst_buf1[2] = tmp_buf1 + 8192; | ||||
|   dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE; | ||||
|   dst_buf1[2] = tmp_buf1 + 2 * MAX_SB_SQUARE; | ||||
|   dst_buf2[0] = tmp_buf2; | ||||
|   dst_buf2[1] = tmp_buf2 + 4096; | ||||
|   dst_buf2[2] = tmp_buf2 + 8192; | ||||
|   dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE; | ||||
|   dst_buf2[2] = tmp_buf2 + 2 * MAX_SB_SQUARE; | ||||
| #if CONFIG_VP9_HIGHBITDEPTH | ||||
|   } | ||||
| #endif  // CONFIG_VP9_HIGHBITDEPTH | ||||
| @@ -9386,7 +9388,7 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi, | ||||
|       int switchable_filter_index; | ||||
|       int_mv *second_ref = comp_pred ? | ||||
|                              &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL; | ||||
|       b_mode_info tmp_best_bmodes[16]; | ||||
|       b_mode_info tmp_best_bmodes[16];  // Should this be 4 ? | ||||
|       MB_MODE_INFO tmp_best_mbmode; | ||||
|       BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; | ||||
|       int pred_exists = 0; | ||||
|   | ||||
| @@ -328,13 +328,13 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) { | ||||
|       mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride + | ||||
|                  tile_info.mi_col_start; | ||||
|       for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end; | ||||
|            mi_row += 8, mi_ptr += 8 * cm->mi_stride) { | ||||
|            mi_row += MI_BLOCK_SIZE, mi_ptr += MI_BLOCK_SIZE * cm->mi_stride) { | ||||
|         MODE_INFO **mi = mi_ptr; | ||||
|         for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; | ||||
|              mi_col += 8, mi += 8) { | ||||
|              mi_col += MI_BLOCK_SIZE, mi += MI_BLOCK_SIZE) { | ||||
|           count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts, | ||||
|                         temporal_predictor_count, t_unpred_seg_counts, | ||||
|                         mi_row, mi_col, BLOCK_64X64); | ||||
|                         mi_row, mi_col, BLOCK_LARGEST); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   | ||||
| @@ -353,6 +353,11 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf, | ||||
|     sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST; | ||||
|     sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST; | ||||
|     sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST; | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     sf->inter_mode_mask[BLOCK_64X128] = INTER_NEAREST; | ||||
|     sf->inter_mode_mask[BLOCK_128X64] = INTER_NEAREST; | ||||
|     sf->inter_mode_mask[BLOCK_128X128] = INTER_NEAREST; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|     sf->max_intra_bsize = BLOCK_32X32; | ||||
|     sf->allow_skip_recode = 1; | ||||
|   } | ||||
| @@ -372,6 +377,11 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf, | ||||
|     sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; | ||||
|     sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; | ||||
|     sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; | ||||
| #if CONFIG_EXT_PARTITION | ||||
|     sf->inter_mode_mask[BLOCK_64X128] = INTER_NEAREST_NEW_ZERO; | ||||
|     sf->inter_mode_mask[BLOCK_128X64] = INTER_NEAREST_NEW_ZERO; | ||||
|     sf->inter_mode_mask[BLOCK_128X128] = INTER_NEAREST_NEW_ZERO; | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
|     sf->adaptive_rd_thresh = 2; | ||||
|     // This feature is only enabled when partition search is disabled. | ||||
|     sf->reuse_inter_pred_sby = 1; | ||||
| @@ -483,7 +493,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) { | ||||
|   sf->use_square_partition_only = 0; | ||||
|   sf->auto_min_max_partition_size = NOT_IN_USE; | ||||
|   sf->rd_auto_partition_min_limit = BLOCK_4X4; | ||||
|   sf->default_max_partition_size = BLOCK_64X64; | ||||
|   sf->default_max_partition_size = BLOCK_LARGEST; | ||||
|   sf->default_min_partition_size = BLOCK_4X4; | ||||
|   sf->adjust_partitioning_from_last_frame = 0; | ||||
|   sf->last_partitioning_redo_frequency = 4; | ||||
| @@ -514,7 +524,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) { | ||||
|   sf->schedule_mode_search = 0; | ||||
|   for (i = 0; i < BLOCK_SIZES; ++i) | ||||
|     sf->inter_mode_mask[i] = INTER_ALL; | ||||
|   sf->max_intra_bsize = BLOCK_64X64; | ||||
|   sf->max_intra_bsize = BLOCK_LARGEST; | ||||
|   sf->reuse_inter_pred_sby = 0; | ||||
|   // This setting only takes effect when partition_search_type is set | ||||
|   // to FIXED_PARTITION. | ||||
| @@ -541,6 +551,12 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) { | ||||
|   else if (oxcf->mode == GOOD) | ||||
|     set_good_speed_feature(cpi, cm, sf, oxcf->speed); | ||||
|  | ||||
|   // sf->partition_search_breakout_dist_thr is set assuming max 64x64 | ||||
|   // blocks. Normalise this if the blocks are bigger. | ||||
|   if (MAX_SB_SIZE_LOG2 > 6) { | ||||
|     sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6); | ||||
|   } | ||||
|  | ||||
|   cpi->full_search_sad = vp10_full_search_sad; | ||||
|   cpi->diamond_search_sad = vp10_diamond_search_sad; | ||||
|  | ||||
|   | ||||
| @@ -438,7 +438,7 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, | ||||
|   MACROBLOCK *const x = &td->mb; | ||||
|   MACROBLOCKD *const xd = &x->e_mbd; | ||||
|   TOKENEXTRA **tp = args->tp; | ||||
|   uint8_t token_cache[32 * 32]; | ||||
|   uint8_t token_cache[MAX_TX_SQUARE]; | ||||
|   struct macroblock_plane *p = &x->plane[plane]; | ||||
|   struct macroblockd_plane *pd = &xd->plane[plane]; | ||||
|   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; | ||||
|   | ||||
| @@ -361,9 +361,7 @@ int vp10_denoiser_filter_sse2(const uint8_t *sig, int sig_stride, | ||||
|                                        avg, avg_stride, | ||||
|                                        increase_denoising, | ||||
|                                        bs, motion_magnitude, 8); | ||||
|   } else if (bs == BLOCK_16X8 || bs == BLOCK_16X16 || bs == BLOCK_16X32 || | ||||
|              bs == BLOCK_32X16|| bs == BLOCK_32X32 || bs == BLOCK_32X64 || | ||||
|              bs == BLOCK_64X32 || bs == BLOCK_64X64) { | ||||
|   } else if (bs < BLOCK_SIZES) { | ||||
|     return vp10_denoiser_NxM_sse2_big(sig, sig_stride, | ||||
|                                      mc_avg, mc_avg_stride, | ||||
|                                      avg, avg_stride, | ||||
|   | ||||
| @@ -130,20 +130,20 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride, | ||||
|   // --Must round-up because block may be located at sub-pixel position. | ||||
|   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. | ||||
|   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. | ||||
|   uint8_t temp[MAX_EXT_SIZE * MAX_CU_SIZE]; | ||||
|   uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE]; | ||||
|   int intermediate_height = | ||||
|           (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; | ||||
|  | ||||
|   assert(w <= MAX_CU_SIZE); | ||||
|   assert(h <= MAX_CU_SIZE); | ||||
|   assert(w <= MAX_SB_SIZE); | ||||
|   assert(h <= MAX_SB_SIZE); | ||||
|  | ||||
|   assert(y_step_q4 <= 32); | ||||
|   assert(x_step_q4 <= 32); | ||||
|  | ||||
|   convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, | ||||
|                  temp, MAX_CU_SIZE, | ||||
|                  temp, MAX_SB_SIZE, | ||||
|                  x_filters, x0_q4, x_step_q4, w, intermediate_height); | ||||
|   convolve_vert(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_CU_SIZE, | ||||
|   convolve_vert(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE, | ||||
|                 dst, dst_stride, | ||||
|                 y_filters, y0_q4, y_step_q4, w, h); | ||||
| } | ||||
| @@ -240,13 +240,13 @@ void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, | ||||
|                          const int16_t *filter_y, int y_step_q4, | ||||
|                          int w, int h) { | ||||
|   /* Fixed size intermediate buffer places limits on parameters. */ | ||||
|   DECLARE_ALIGNED(16, uint8_t, temp[MAX_CU_SIZE * MAX_CU_SIZE]); | ||||
|   assert(w <= MAX_CU_SIZE); | ||||
|   assert(h <= MAX_CU_SIZE); | ||||
|   DECLARE_ALIGNED(16, uint8_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]); | ||||
|   assert(w <= MAX_SB_SIZE); | ||||
|   assert(h <= MAX_SB_SIZE); | ||||
|  | ||||
|   vpx_convolve8_c(src, src_stride, temp, MAX_CU_SIZE, | ||||
|   vpx_convolve8_c(src, src_stride, temp, MAX_SB_SIZE, | ||||
|                   filter_x, x_step_q4, filter_y, y_step_q4, w, h); | ||||
|   vpx_convolve_avg_c(temp, MAX_CU_SIZE, dst, dst_stride, | ||||
|   vpx_convolve_avg_c(temp, MAX_SB_SIZE, dst, dst_stride, | ||||
|                      NULL, 0, NULL, 0, w, h); | ||||
| } | ||||
|  | ||||
| @@ -463,21 +463,21 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride, | ||||
|   // --Must round-up because block may be located at sub-pixel position. | ||||
|   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. | ||||
|   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. | ||||
|   uint16_t temp[MAX_EXT_SIZE * MAX_CU_SIZE]; | ||||
|   uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE]; | ||||
|   int intermediate_height = | ||||
|           (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; | ||||
|  | ||||
|   assert(w <= MAX_CU_SIZE); | ||||
|   assert(h <= MAX_CU_SIZE); | ||||
|   assert(w <= MAX_SB_SIZE); | ||||
|   assert(h <= MAX_SB_SIZE); | ||||
|   assert(y_step_q4 <= 32); | ||||
|   assert(x_step_q4 <= 32); | ||||
|  | ||||
|   highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, | ||||
|                         CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE, | ||||
|                         CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, | ||||
|                         x_filters, x0_q4, x_step_q4, w, | ||||
|                         intermediate_height, bd); | ||||
|   highbd_convolve_vert( | ||||
|     CONVERT_TO_BYTEPTR(temp) + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_CU_SIZE, | ||||
|     CONVERT_TO_BYTEPTR(temp) + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE, | ||||
|     dst, dst_stride, | ||||
|     y_filters, y0_q4, y_step_q4, w, h, bd); | ||||
| } | ||||
| @@ -561,14 +561,14 @@ void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, | ||||
|                                 const int16_t *filter_y, int y_step_q4, | ||||
|                                 int w, int h, int bd) { | ||||
|   // Fixed size intermediate buffer places limits on parameters. | ||||
|   DECLARE_ALIGNED(16, uint16_t, temp[MAX_CU_SIZE * MAX_CU_SIZE]); | ||||
|   assert(w <= MAX_CU_SIZE); | ||||
|   assert(h <= MAX_CU_SIZE); | ||||
|   DECLARE_ALIGNED(16, uint16_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]); | ||||
|   assert(w <= MAX_SB_SIZE); | ||||
|   assert(h <= MAX_SB_SIZE); | ||||
|  | ||||
|   vpx_highbd_convolve8_c(src, src_stride, | ||||
|                          CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE, | ||||
|                          CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, | ||||
|                          filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); | ||||
|   vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE, | ||||
|   vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, | ||||
|                             dst, dst_stride, | ||||
|                             NULL, 0, NULL, 0, w, h, bd); | ||||
| } | ||||
|   | ||||
| @@ -13,18 +13,19 @@ | ||||
|  | ||||
| #include "./vpx_config.h" | ||||
| #include "vpx/vpx_integer.h" | ||||
| #include "vpx_dsp/vpx_dsp_common.h" | ||||
| #include "vpx_ports/mem.h" | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| #if CONFIG_VP10 && CONFIG_EXT_PARTITION | ||||
| # define MAX_CU_SIZE 128 | ||||
| #else | ||||
| # define MAX_CU_SIZE 64 | ||||
| #endif  // CONFIG_VP10 && CONFIG_EXT_PARTITION | ||||
| #ifndef MAX_SB_SIZE | ||||
| # if CONFIG_VP10 && CONFIG_EXT_PARTITION | ||||
| #   define MAX_SB_SIZE 128 | ||||
| # else | ||||
| #   define MAX_SB_SIZE 64 | ||||
| # endif  // CONFIG_VP10 && CONFIG_EXT_PARTITION | ||||
| #endif  // ndef MAX_SB_SIZE | ||||
|  | ||||
| #define VPXMIN(x, y) (((x) < (y)) ? (x) : (y)) | ||||
| #define VPXMAX(x, y) (((x) > (y)) ? (x) : (y)) | ||||
|   | ||||
| @@ -99,27 +99,27 @@ void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ | ||||
|                               int w, int h) { \ | ||||
|   assert(filter_x[3] != 128); \ | ||||
|   assert(filter_y[3] != 128); \ | ||||
|   assert(w <= MAX_CU_SIZE); \ | ||||
|   assert(h <= MAX_CU_SIZE); \ | ||||
|   assert(w <= MAX_SB_SIZE); \ | ||||
|   assert(h <= MAX_SB_SIZE); \ | ||||
|   assert(x_step_q4 == 16); \ | ||||
|   assert(y_step_q4 == 16); \ | ||||
|   if (filter_x[0] || filter_x[1] || filter_x[2]|| \ | ||||
|       filter_y[0] || filter_y[1] || filter_y[2]) { \ | ||||
|     DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \ | ||||
|     DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+7)]); \ | ||||
|     vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ | ||||
|                               fdata2, MAX_CU_SIZE, \ | ||||
|                               fdata2, MAX_SB_SIZE, \ | ||||
|                               filter_x, x_step_q4, filter_y, y_step_q4, \ | ||||
|                               w, h + 7); \ | ||||
|     vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * MAX_CU_SIZE, MAX_CU_SIZE, \ | ||||
|     vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * MAX_SB_SIZE, MAX_SB_SIZE, \ | ||||
|                                     dst, dst_stride, \ | ||||
|                                     filter_x, x_step_q4, filter_y, \ | ||||
|                                     y_step_q4, w, h); \ | ||||
|   } else { \ | ||||
|     DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+1)]); \ | ||||
|     vpx_convolve8_horiz_##opt(src, src_stride, fdata2, MAX_CU_SIZE, \ | ||||
|     DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+1)]); \ | ||||
|     vpx_convolve8_horiz_##opt(src, src_stride, fdata2, MAX_SB_SIZE, \ | ||||
|                               filter_x, x_step_q4, filter_y, y_step_q4, \ | ||||
|                               w, h + 1); \ | ||||
|     vpx_convolve8_##avg##vert_##opt(fdata2, MAX_CU_SIZE, dst, dst_stride, \ | ||||
|     vpx_convolve8_##avg##vert_##opt(fdata2, MAX_SB_SIZE, dst, dst_stride, \ | ||||
|                                     filter_x, x_step_q4, filter_y, \ | ||||
|                                     y_step_q4, w, h); \ | ||||
|   } \ | ||||
| @@ -239,38 +239,38 @@ void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ | ||||
|                                      const int16_t *filter_x, int x_step_q4, \ | ||||
|                                      const int16_t *filter_y, int y_step_q4, \ | ||||
|                                      int w, int h, int bd) { \ | ||||
|   assert(w <= MAX_CU_SIZE); \ | ||||
|   assert(h <= MAX_CU_SIZE); \ | ||||
|   assert(w <= MAX_SB_SIZE); \ | ||||
|   assert(h <= MAX_SB_SIZE); \ | ||||
|   if (x_step_q4 == 16 && y_step_q4 == 16) { \ | ||||
|     if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ | ||||
|         filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ | ||||
|       DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \ | ||||
|       DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+7)]); \ | ||||
|       vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, \ | ||||
|                                        src_stride, \ | ||||
|                                        CONVERT_TO_BYTEPTR(fdata2), \ | ||||
|                                        MAX_CU_SIZE, \ | ||||
|                                        MAX_SB_SIZE, \ | ||||
|                                        filter_x, x_step_q4, \ | ||||
|                                        filter_y, y_step_q4, \ | ||||
|                                        w, h + 7, bd); \ | ||||
|       vpx_highbd_convolve8_##avg##vert_##opt( \ | ||||
|         CONVERT_TO_BYTEPTR(fdata2) + 3 * MAX_CU_SIZE, \ | ||||
|         MAX_CU_SIZE, \ | ||||
|         CONVERT_TO_BYTEPTR(fdata2) + 3 * MAX_SB_SIZE, \ | ||||
|         MAX_SB_SIZE, \ | ||||
|         dst, \ | ||||
|         dst_stride, \ | ||||
|         filter_x, x_step_q4, \ | ||||
|         filter_y, y_step_q4, \ | ||||
|         w, h, bd); \ | ||||
|     } else { \ | ||||
|       DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+1)]); \ | ||||
|       DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+1)]); \ | ||||
|       vpx_highbd_convolve8_horiz_##opt(src, \ | ||||
|                                        src_stride, \ | ||||
|                                        CONVERT_TO_BYTEPTR(fdata2), \ | ||||
|                                        MAX_CU_SIZE, \ | ||||
|                                        MAX_SB_SIZE, \ | ||||
|                                        filter_x, x_step_q4, \ | ||||
|                                        filter_y, y_step_q4, \ | ||||
|                                        w, h + 1, bd); \ | ||||
|       vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), \ | ||||
|                                              MAX_CU_SIZE, \ | ||||
|                                              MAX_SB_SIZE, \ | ||||
|                                              dst, \ | ||||
|                                              dst_stride, \ | ||||
|                                              filter_x, x_step_q4, \ | ||||
|   | ||||
| @@ -31,6 +31,10 @@ cglobal subtract_block, 7, 7, 8, \ | ||||
|   je .case_16 | ||||
|   cmp                colsd, 32 | ||||
|   je .case_32 | ||||
| %if CONFIG_EXT_PARTITION | ||||
|   cmp                colsd, 64 | ||||
|   je .case_64 | ||||
| %endif | ||||
|  | ||||
| %macro loop16 6 | ||||
|   mova                  m0, [srcq+%1] | ||||
| @@ -55,6 +59,22 @@ cglobal subtract_block, 7, 7, 8, \ | ||||
|   mova [diffq+mmsize*1+%6], m1 | ||||
| %endmacro | ||||
|  | ||||
| %if CONFIG_EXT_PARTITION | ||||
|   mov             pred_str, pred_stridemp | ||||
| .loop_128: | ||||
|   loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize,  0*mmsize,  2*mmsize | ||||
|   loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize,  4*mmsize,  6*mmsize | ||||
|   loop16 4*mmsize, 5*mmsize, 4*mmsize, 5*mmsize,  8*mmsize, 10*mmsize | ||||
|   loop16 6*mmsize, 7*mmsize, 6*mmsize, 7*mmsize, 12*mmsize, 14*mmsize | ||||
|   lea                diffq, [diffq+diff_strideq*2] | ||||
|   add                predq, pred_str | ||||
|   add                 srcq, src_strideq | ||||
|   sub                rowsd, 1 | ||||
|   jnz .loop_128 | ||||
|   RET | ||||
|  | ||||
| .case_64: | ||||
| %endif | ||||
|   mov             pred_str, pred_stridemp | ||||
| .loop_64: | ||||
|   loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize | ||||
|   | ||||
| @@ -844,12 +844,12 @@ static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride, | ||||
|   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. | ||||
|   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. | ||||
|   // --Require an additional 8 rows for the horiz_w8 transpose tail. | ||||
|   DECLARE_ALIGNED(16, uint8_t, temp[(MAX_EXT_SIZE + 8) * MAX_CU_SIZE]); | ||||
|   DECLARE_ALIGNED(16, uint8_t, temp[(MAX_EXT_SIZE + 8) * MAX_SB_SIZE]); | ||||
|   const int intermediate_height = | ||||
|       (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; | ||||
|  | ||||
|   assert(w <= MAX_CU_SIZE); | ||||
|   assert(h <= MAX_CU_SIZE); | ||||
|   assert(w <= MAX_SB_SIZE); | ||||
|   assert(h <= MAX_SB_SIZE); | ||||
|   assert(y_step_q4 <= 32); | ||||
|   assert(x_step_q4 <= 32); | ||||
|  | ||||
| @@ -857,33 +857,33 @@ static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride, | ||||
|     scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1), | ||||
|                             src_stride, | ||||
|                             temp, | ||||
|                             MAX_CU_SIZE, | ||||
|                             MAX_SB_SIZE, | ||||
|                             x_filters, x0_q4, x_step_q4, | ||||
|                             w, intermediate_height); | ||||
|   } else { | ||||
|     scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1), | ||||
|                             src_stride, | ||||
|                             temp, | ||||
|                             MAX_CU_SIZE, | ||||
|                             MAX_SB_SIZE, | ||||
|                             x_filters, x0_q4, x_step_q4, | ||||
|                             w, intermediate_height); | ||||
|   } | ||||
|  | ||||
|   if (w >= 16) { | ||||
|     scaledconvolve_vert_w16(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), | ||||
|                             MAX_CU_SIZE, | ||||
|     scaledconvolve_vert_w16(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), | ||||
|                             MAX_SB_SIZE, | ||||
|                             dst, | ||||
|                             dst_stride, | ||||
|                             y_filters, y0_q4, y_step_q4, w, h); | ||||
|   } else if (w == 8) { | ||||
|     scaledconvolve_vert_w8(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), | ||||
|                            MAX_CU_SIZE, | ||||
|     scaledconvolve_vert_w8(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), | ||||
|                            MAX_SB_SIZE, | ||||
|                            dst, | ||||
|                            dst_stride, | ||||
|                            y_filters, y0_q4, y_step_q4, w, h); | ||||
|   } else { | ||||
|     scaledconvolve_vert_w4(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), | ||||
|                            MAX_CU_SIZE, | ||||
|     scaledconvolve_vert_w4(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), | ||||
|                            MAX_SB_SIZE, | ||||
|                            dst, | ||||
|                            dst_stride, | ||||
|                            y_filters, y0_q4, y_step_q4, w, h); | ||||
|   | ||||
| @@ -21,7 +21,11 @@ extern "C" { | ||||
| #include "vpx/vpx_integer.h" | ||||
|  | ||||
| #define VP8BORDERINPIXELS           32 | ||||
| #define VP9INNERBORDERINPIXELS      96 | ||||
| #if CONFIG_EXT_PARTITION | ||||
| # define VP9INNERBORDERINPIXELS     160 | ||||
| #else | ||||
| # define VP9INNERBORDERINPIXELS     96 | ||||
| #endif  // CONFIG_EXT_PARTITION | ||||
| #define VP9_INTERP_EXTEND           4 | ||||
| #define VP9_ENC_BORDER_IN_PIXELS    160 | ||||
| #define VP9_DEC_BORDER_IN_PIXELS    160 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Geza Lore
					Geza Lore