Extend superblock size fo 128x128 pixels.

If --enable-ext-partition is used at build time, the superblock size
(sometimes also referred to as coding unit (CU) size) is extended to
128x128 pixels.

Change-Id: Ie09cec6b7e8d765b7555ff5d80974aab60803f3a
This commit is contained in:
Geza Lore
2016-03-07 13:46:39 +00:00
parent cd1d01b96a
commit 552d5cd715
52 changed files with 1448 additions and 824 deletions

View File

@@ -28,7 +28,7 @@
namespace { namespace {
static const unsigned int kMaxDimension = MAX_CU_SIZE; static const unsigned int kMaxDimension = MAX_SB_SIZE;
typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride, uint8_t *dst, ptrdiff_t dst_stride,

View File

@@ -50,16 +50,16 @@ class MaskedSADTest : public ::testing::TestWithParam<MaskedSADParam> {
TEST_P(MaskedSADTest, OperationCheck) { TEST_P(MaskedSADTest, OperationCheck) {
unsigned int ref_ret, ret; unsigned int ref_ret, ret;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
int err_count = 0; int err_count = 0;
int first_failure = -1; int first_failure = -1;
int src_stride = MAX_CU_SIZE; int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_CU_SIZE; int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_CU_SIZE; int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < number_of_iterations; ++i) { for (int i = 0; i < number_of_iterations; ++i) {
for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) {
src_ptr[j] = rnd.Rand8(); src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8(); ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64; msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64;
@@ -108,18 +108,18 @@ class HighbdMaskedSADTest : public ::testing::
TEST_P(HighbdMaskedSADTest, OperationCheck) { TEST_P(HighbdMaskedSADTest, OperationCheck) {
unsigned int ref_ret, ret; unsigned int ref_ret, ret;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0; int err_count = 0;
int first_failure = -1; int first_failure = -1;
int src_stride = MAX_CU_SIZE; int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_CU_SIZE; int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_CU_SIZE; int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < number_of_iterations; ++i) { for (int i = 0; i < number_of_iterations; ++i) {
for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) {
src_ptr[j] = rnd.Rand16()&0xfff; src_ptr[j] = rnd.Rand16()&0xfff;
ref_ptr[j] = rnd.Rand16()&0xfff; ref_ptr[j] = rnd.Rand16()&0xfff;
msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64; msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64;

View File

@@ -58,17 +58,17 @@ TEST_P(MaskedVarianceTest, OperationCheck) {
unsigned int ref_ret, opt_ret; unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse; unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
int err_count = 0; int err_count = 0;
int first_failure = -1; int first_failure = -1;
int src_stride = MAX_CU_SIZE; int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_CU_SIZE; int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_CU_SIZE; int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < number_of_iterations; ++i) { for (int i = 0; i < number_of_iterations; ++i) {
for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) {
src_ptr[j] = rnd.Rand8(); src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8(); ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = rnd(65); msk_ptr[j] = rnd(65);
@@ -100,19 +100,19 @@ TEST_P(MaskedVarianceTest, ExtremeValues) {
unsigned int ref_ret, opt_ret; unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse; unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
int err_count = 0; int err_count = 0;
int first_failure = -1; int first_failure = -1;
int src_stride = MAX_CU_SIZE; int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_CU_SIZE; int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_CU_SIZE; int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < 8; ++i) { for (int i = 0; i < 8; ++i) {
memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE); memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_SB_SIZE*MAX_SB_SIZE);
memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_CU_SIZE*MAX_CU_SIZE); memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_SB_SIZE*MAX_SB_SIZE);
memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_CU_SIZE*MAX_CU_SIZE); memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE*MAX_SB_SIZE);
ref_ret = ref_func_(src_ptr, src_stride, ref_ret = ref_func_(src_ptr, src_stride,
ref_ptr, ref_stride, ref_ptr, ref_stride,
@@ -166,21 +166,21 @@ TEST_P(MaskedSubPixelVarianceTest, OperationCheck) {
unsigned int ref_ret, opt_ret; unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse; unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
int err_count = 0; int err_count = 0;
int first_failure = -1; int first_failure = -1;
int src_stride = (MAX_CU_SIZE+1); int src_stride = (MAX_SB_SIZE+1);
int ref_stride = (MAX_CU_SIZE+1); int ref_stride = (MAX_SB_SIZE+1);
int msk_stride = (MAX_CU_SIZE+1); int msk_stride = (MAX_SB_SIZE+1);
int xoffset; int xoffset;
int yoffset; int yoffset;
for (int i = 0; i < number_of_iterations; ++i) { for (int i = 0; i < number_of_iterations; ++i) {
int xoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)}; int xoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)};
int yoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)}; int yoffsets[] = {0, 4, rnd(BIL_SUBPEL_SHIFTS)};
for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) { for (int j = 0; j < (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1); j++) {
src_ptr[j] = rnd.Rand8(); src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8(); ref_ptr[j] = rnd.Rand8();
msk_ptr[j] = rnd(65); msk_ptr[j] = rnd(65);
@@ -221,23 +221,23 @@ TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) {
unsigned int ref_ret, opt_ret; unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse; unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
int first_failure_x = -1; int first_failure_x = -1;
int first_failure_y = -1; int first_failure_y = -1;
int err_count = 0; int err_count = 0;
int first_failure = -1; int first_failure = -1;
int src_stride = (MAX_CU_SIZE+1); int src_stride = (MAX_SB_SIZE+1);
int ref_stride = (MAX_CU_SIZE+1); int ref_stride = (MAX_SB_SIZE+1);
int msk_stride = (MAX_CU_SIZE+1); int msk_stride = (MAX_SB_SIZE+1);
for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) { for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) {
for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) { for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) {
for (int i = 0; i < 8; ++i) { for (int i = 0; i < 8; ++i) {
memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
ref_ret = ref_func_(src_ptr, src_stride, ref_ret = ref_func_(src_ptr, src_stride,
xoffset, yoffset, xoffset, yoffset,
@@ -297,19 +297,19 @@ TEST_P(HighbdMaskedVarianceTest, OperationCheck) {
unsigned int ref_ret, opt_ret; unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse; unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0; int err_count = 0;
int first_failure = -1; int first_failure = -1;
int src_stride = MAX_CU_SIZE; int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_CU_SIZE; int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_CU_SIZE; int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < number_of_iterations; ++i) { for (int i = 0; i < number_of_iterations; ++i) {
for (int j = 0; j < MAX_CU_SIZE*MAX_CU_SIZE; j++) { for (int j = 0; j < MAX_SB_SIZE*MAX_SB_SIZE; j++) {
src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
msk_ptr[j] = rnd(65); msk_ptr[j] = rnd(65);
@@ -341,23 +341,23 @@ TEST_P(HighbdMaskedVarianceTest, ExtremeValues) {
unsigned int ref_ret, opt_ret; unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse; unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_CU_SIZE*MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE*MAX_SB_SIZE]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0; int err_count = 0;
int first_failure = -1; int first_failure = -1;
int src_stride = MAX_CU_SIZE; int src_stride = MAX_SB_SIZE;
int ref_stride = MAX_CU_SIZE; int ref_stride = MAX_SB_SIZE;
int msk_stride = MAX_CU_SIZE; int msk_stride = MAX_SB_SIZE;
for (int i = 0; i < 8; ++i) { for (int i = 0; i < 8; ++i) {
vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0, vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
MAX_CU_SIZE*MAX_CU_SIZE); MAX_SB_SIZE*MAX_SB_SIZE);
vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0, vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
MAX_CU_SIZE*MAX_CU_SIZE); MAX_SB_SIZE*MAX_SB_SIZE);
memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_CU_SIZE*MAX_CU_SIZE); memset(msk_ptr, (i & 0x4) ? 64 : 0, MAX_SB_SIZE*MAX_SB_SIZE);
ref_ret = ref_func_(src8_ptr, src_stride, ref_ret = ref_func_(src8_ptr, src_stride,
ref8_ptr, ref_stride, ref8_ptr, ref_stride,
@@ -407,24 +407,24 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) {
unsigned int ref_ret, opt_ret; unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse; unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int err_count = 0; int err_count = 0;
int first_failure = -1; int first_failure = -1;
int first_failure_x = -1; int first_failure_x = -1;
int first_failure_y = -1; int first_failure_y = -1;
int src_stride = (MAX_CU_SIZE+1); int src_stride = (MAX_SB_SIZE+1);
int ref_stride = (MAX_CU_SIZE+1); int ref_stride = (MAX_SB_SIZE+1);
int msk_stride = (MAX_CU_SIZE+1); int msk_stride = (MAX_SB_SIZE+1);
int xoffset, yoffset; int xoffset, yoffset;
for (int i = 0; i < number_of_iterations; ++i) { for (int i = 0; i < number_of_iterations; ++i) {
for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) { for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) { for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
for (int j = 0; j < (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1); j++) { for (int j = 0; j < (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1); j++) {
src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1); ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
msk_ptr[j] = rnd(65); msk_ptr[j] = rnd(65);
@@ -465,27 +465,27 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) {
unsigned int ref_ret, opt_ret; unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse; unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)]); DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE+1)*(MAX_SB_SIZE+1)]);
uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr); uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr); uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
int first_failure_x = -1; int first_failure_x = -1;
int first_failure_y = -1; int first_failure_y = -1;
int err_count = 0; int err_count = 0;
int first_failure = -1; int first_failure = -1;
int src_stride = (MAX_CU_SIZE+1); int src_stride = (MAX_SB_SIZE+1);
int ref_stride = (MAX_CU_SIZE+1); int ref_stride = (MAX_SB_SIZE+1);
int msk_stride = (MAX_CU_SIZE+1); int msk_stride = (MAX_SB_SIZE+1);
for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) { for (int xoffset = 0 ; xoffset < BIL_SUBPEL_SHIFTS ; xoffset++) {
for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) { for (int yoffset = 0 ; yoffset < BIL_SUBPEL_SHIFTS ; yoffset++) {
for (int i = 0; i < 8; ++i) { for (int i = 0; i < 8; ++i) {
vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0, vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0, vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
(MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_CU_SIZE+1)*(MAX_CU_SIZE+1)); memset(msk_ptr, (i & 0x4) ? 64 : 0, (MAX_SB_SIZE+1)*(MAX_SB_SIZE+1));
ref_ret = ref_func_(src8_ptr, src_stride, ref_ret = ref_func_(src8_ptr, src_stride,
xoffset, yoffset, xoffset, yoffset,

View File

@@ -10,13 +10,16 @@
#include "third_party/googletest/src/include/gtest/gtest.h" #include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vp9_rtcd.h"
#include "./vpx_config.h" #include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h" #include "./vpx_dsp_rtcd.h"
#include "test/acm_random.h" #include "test/acm_random.h"
#include "test/clear_system_state.h" #include "test/clear_system_state.h"
#include "test/register_state_check.h" #include "test/register_state_check.h"
#if CONFIG_VP10
#include "vp10/common/blockd.h"
#elif CONFIG_VP9
#include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_blockd.h"
#endif
#include "vpx_mem/vpx_mem.h" #include "vpx_mem/vpx_mem.h"
typedef void (*SubtractFunc)(int rows, int cols, typedef void (*SubtractFunc)(int rows, int cols,
@@ -24,7 +27,7 @@ typedef void (*SubtractFunc)(int rows, int cols,
const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *src_ptr, ptrdiff_t src_stride,
const uint8_t *pred_ptr, ptrdiff_t pred_stride); const uint8_t *pred_ptr, ptrdiff_t pred_stride);
namespace vp9 { namespace {
class VP9SubtractBlockTest : public ::testing::TestWithParam<SubtractFunc> { class VP9SubtractBlockTest : public ::testing::TestWithParam<SubtractFunc> {
public: public:
@@ -105,5 +108,4 @@ INSTANTIATE_TEST_CASE_P(NEON, VP9SubtractBlockTest,
INSTANTIATE_TEST_CASE_P(MSA, VP9SubtractBlockTest, INSTANTIATE_TEST_CASE_P(MSA, VP9SubtractBlockTest,
::testing::Values(vpx_subtract_block_msa)); ::testing::Values(vpx_subtract_block_msa));
#endif #endif
} // namespace
} // namespace vp9

View File

@@ -147,7 +147,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += subtract_test.cc
ifeq ($(CONFIG_VP9_ENCODER),yes) ifeq ($(CONFIG_VP9_ENCODER),yes)
LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc
@@ -172,6 +172,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_fht16x16_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ANS) += vp10_ans_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ANS) += vp10_ans_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += sum_squares_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += sum_squares_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += subtract_test.cc
ifeq ($(CONFIG_EXT_INTER),yes) ifeq ($(CONFIG_EXT_INTER),yes)
LIBVPX_TEST_SRCS-$(HAVE_SSSE3) += masked_variance_test.cc LIBVPX_TEST_SRCS-$(HAVE_SSSE3) += masked_variance_test.cc

View File

@@ -44,9 +44,6 @@ typedef enum {
#define IsInterpolatingFilter(filter) (1) #define IsInterpolatingFilter(filter) (1)
#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS #endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
#define MAXTXLEN 32
#define CU_SIZE 64
static INLINE int is_inter_mode(PREDICTION_MODE mode) { static INLINE int is_inter_mode(PREDICTION_MODE mode) {
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
return mode >= NEARESTMV && mode <= NEW_NEWMV; return mode >= NEARESTMV && mode <= NEW_NEWMV;
@@ -167,8 +164,8 @@ typedef struct {
PREDICTION_MODE mode; PREDICTION_MODE mode;
TX_SIZE tx_size; TX_SIZE tx_size;
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
// TODO(jingning): This effectively assigned an entry for each 8x8 block. // TODO(jingning): This effectively assigned a separate entry for each
// Apparently it takes much more space than needed. // 8x8 block. Apparently it takes much more space than needed.
TX_SIZE inter_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; TX_SIZE inter_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
#endif #endif
int8_t skip; int8_t skip;
@@ -318,15 +315,15 @@ typedef struct macroblockd {
const YV12_BUFFER_CONFIG *cur_buf; const YV12_BUFFER_CONFIG *cur_buf;
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; ENTROPY_CONTEXT left_context[MAX_MB_PLANE][2 * MI_BLOCK_SIZE];
PARTITION_CONTEXT *above_seg_context; PARTITION_CONTEXT *above_seg_context;
PARTITION_CONTEXT left_seg_context[8]; PARTITION_CONTEXT left_seg_context[MI_BLOCK_SIZE];
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
TXFM_CONTEXT *above_txfm_context; TXFM_CONTEXT *above_txfm_context;
TXFM_CONTEXT *left_txfm_context; TXFM_CONTEXT *left_txfm_context;
TXFM_CONTEXT left_txfm_context_buffer[8]; TXFM_CONTEXT left_txfm_context_buffer[MI_BLOCK_SIZE];
TX_SIZE max_tx_size; TX_SIZE max_tx_size;
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
@@ -686,6 +683,7 @@ void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) { static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) {
// TODO(debargha): Should this be bsize < BLOCK_LARGEST?
return (bsize >= BLOCK_8X8) && (bsize < BLOCK_64X64); return (bsize >= BLOCK_8X8) && (bsize < BLOCK_64X64);
} }

View File

@@ -19,154 +19,282 @@
extern "C" { extern "C" {
#endif #endif
#if CONFIG_EXT_PARTITION
# define IF_EXT_PARTITION(...) __VA_ARGS__
#else
# define IF_EXT_PARTITION(...)
#endif
// Log 2 conversion lookup tables for block width and height // Log 2 conversion lookup tables for block width and height
static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = static const uint8_t b_width_log2_lookup[BLOCK_SIZES] =
{0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4}; {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5)};
static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = static const uint8_t b_height_log2_lookup[BLOCK_SIZES] =
{0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4}; {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5)};
static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] =
{1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16};
static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] =
{1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16};
// Log 2 conversion lookup tables for modeinfo width and height // Log 2 conversion lookup tables for modeinfo width and height
static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] =
{0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}; {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, IF_EXT_PARTITION(3, 4, 4)};
static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] = static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] =
{0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3}; {0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4)};
// Width/height lookup tables in units of varios block sizes
static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] =
{1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32)};
static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] =
{1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32)};
static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8}; {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16)};
static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8}; {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16)};
static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, IF_EXT_PARTITION(4, 8, 8)};
static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)};
// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize))) // VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize)))
static const uint8_t size_group_lookup[BLOCK_SIZES] = static const uint8_t size_group_lookup[BLOCK_SIZES] =
{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3}; {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3)};
static const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = static const uint8_t num_pels_log2_lookup[BLOCK_SIZES] =
{4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12}; {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, IF_EXT_PARTITION(13, 13, 14)};
static const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = { static const PARTITION_TYPE
{ // 4X4 partition_lookup[MAX_SB_SIZE_LOG2 - 1][BLOCK_SIZES] = {
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 { // 4X4 ->
PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, // 4X4
PARTITION_NONE,
// 4X8, 8X4, 8X8
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
// 8X16, 16X8, 16X16
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
// 16X32, 32X16, 32X32
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID // 32X64, 64X32, 64X64
}, { // 8X8
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64
PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#if CONFIG_EXT_PARTITION
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID #endif // CONFIG_EXT_PARTITION
}, { // 16X16 }, { // 8X8 ->
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 // 4X4
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, // 4X8, 8X4, 8X8
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
// 8X16, 16X8, 16X16
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_INVALID, PARTITION_INVALID // 16X32, 32X16, 32X32
}, { // 32X32 PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 // 32X64, 64X32, 64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, #if CONFIG_EXT_PARTITION
PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, // 64x128, 128x64, 128x128
PARTITION_INVALID, PARTITION_INVALID PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
}, { // 64X64 #endif // CONFIG_EXT_PARTITION
// 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 }, { // 16X16 ->
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, // 4X4
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, // 4X8, 8X4, 8X8
PARTITION_NONE PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 8X16, 16X8, 16X16
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
// 16X32, 32X16, 32X32
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
// 32X64, 64X32, 64X64
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#endif // CONFIG_EXT_PARTITION
}, { // 32X32 ->
// 4X4
PARTITION_SPLIT,
// 4X8, 8X4, 8X8
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 8X16, 16X8, 16X16
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 16X32, 32X16, 32X32
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
// 32X64, 64X32, 64X64
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
#endif // CONFIG_EXT_PARTITION
}, { // 64X64 ->
// 4X4
PARTITION_SPLIT,
// 4X8, 8X4, 8X8
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 8X16, 16X8, 16X16
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 16X32, 32X16, 32X32
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 32X64, 64X32, 64X64
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
}, { // 128x128 ->
// 4X4
PARTITION_SPLIT,
// 4X8, 8X4, 8X8
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 8X16, 16X8, 16X16
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 16X32, 32X16, 32X32
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 32X64, 64X32, 64X64
PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
// 64x128, 128x64, 128x128
PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
#endif // CONFIG_EXT_PARTITION
} }
}; };
#if CONFIG_EXT_PARTITION_TYPES #if CONFIG_EXT_PARTITION_TYPES
static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES] = { static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES] =
{ // PARTITION_NONE
BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
BLOCK_16X16, BLOCK_16X32, BLOCK_32X16,
BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
BLOCK_64X64,
}, { // PARTITION_HORZ
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_64X32,
}, { // PARTITION_VERT
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X64,
}, { // PARTITION_SPLIT
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X32,
}, { // PARTITION_HORZ_A
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_64X32,
}, { // PARTITION_HORZ_B
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_64X32,
}, { // PARTITION_VERT_A
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X64,
}, { // PARTITION_VERT_B
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_32X64,
}
};
#else #else
static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = { static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] =
#endif // CONFIG_EXT_PARTITION_TYPES
{
{ // PARTITION_NONE { // PARTITION_NONE
BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, // 4X4
BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, BLOCK_4X4,
BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, // 4X8, 8X4, 8X8
BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, BLOCK_4X8, BLOCK_8X4, BLOCK_8X8,
BLOCK_64X64, // 8X16, 16X8, 16X16
BLOCK_8X16, BLOCK_16X8, BLOCK_16X16,
// 16X32, 32X16, 32X32
BLOCK_16X32, BLOCK_32X16, BLOCK_32X32,
// 32X64, 64X32, 64X64
BLOCK_32X64, BLOCK_64X32, BLOCK_64X64,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_64X128, BLOCK_128X64, BLOCK_128X128,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_HORZ }, { // PARTITION_HORZ
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 4X4
BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID, // 4X8, 8X4, 8X8
BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
BLOCK_64X32, // 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_VERT }, { // PARTITION_VERT
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 4X4
BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID, // 4X8, 8X4, 8X8
BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
BLOCK_32X64, // 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_SPLIT }, { // PARTITION_SPLIT
BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 4X4
BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID, // 4X8, 8X4, 8X8
BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4,
BLOCK_32X32, // 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64,
#endif // CONFIG_EXT_PARTITION
#if CONFIG_EXT_PARTITION_TYPES
}, { // PARTITION_HORZ_A
// 4X4
BLOCK_INVALID,
// 4X8, 8X4, 8X8
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
// 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_HORZ_B
// 4X4
BLOCK_INVALID,
// 4X8, 8X4, 8X8
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
// 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_VERT_A
// 4X4
BLOCK_INVALID,
// 4X8, 8X4, 8X8
BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
// 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
#endif // CONFIG_EXT_PARTITION
}, { // PARTITION_VERT_B
// 4X4
BLOCK_INVALID,
// 4X8, 8X4, 8X8
BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
// 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
// 16X32, 32X16, 32X32
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
// 32X64, 64X32, 64X64
BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
#endif // CONFIG_EXT_PARTITION
#endif // CONFIG_EXT_PARTITION_TYPES
} }
}; };
#endif // CONFIG_EXT_PARTITION_TYPES
static const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = { static const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = {
TX_4X4, TX_4X4, TX_4X4, // 4X4
TX_8X8, TX_8X8, TX_8X8, TX_4X4,
TX_16X16, TX_16X16, TX_16X16, // 4X8, 8X4, 8X8
TX_32X32, TX_32X32, TX_32X32, TX_32X32 TX_4X4, TX_4X4, TX_8X8,
// 8X16, 16X8, 16X16
TX_8X8, TX_8X8, TX_16X16,
// 16X32, 32X16, 32X32
TX_16X16, TX_16X16, TX_32X32,
// 32X64, 64X32, 64X64
TX_32X32, TX_32X32, TX_32X32,
#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
TX_32X32, TX_32X32, TX_32X32,
#endif // CONFIG_EXT_PARTITION
}; };
static const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = { static const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = {
@@ -200,6 +328,11 @@ static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = {
{{BLOCK_32X64, BLOCK_32X32}, {BLOCK_INVALID, BLOCK_16X32}}, {{BLOCK_32X64, BLOCK_32X32}, {BLOCK_INVALID, BLOCK_16X32}},
{{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32, BLOCK_32X16}}, {{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32, BLOCK_32X16}},
{{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}}, {{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}},
#if CONFIG_EXT_PARTITION
{{BLOCK_64X128, BLOCK_64X64}, {BLOCK_INVALID, BLOCK_32X64}},
{{BLOCK_128X64, BLOCK_INVALID}, {BLOCK_64X64, BLOCK_64X32}},
{{BLOCK_128X128, BLOCK_128X64}, {BLOCK_64X128, BLOCK_64X64}},
#endif // CONFIG_EXT_PARTITION
}; };
// Generates 4 bit field in which each bit set to 1 represents // Generates 4 bit field in which each bit set to 1 represents
@@ -209,6 +342,24 @@ static const struct {
PARTITION_CONTEXT above; PARTITION_CONTEXT above;
PARTITION_CONTEXT left; PARTITION_CONTEXT left;
} partition_context_lookup[BLOCK_SIZES]= { } partition_context_lookup[BLOCK_SIZES]= {
#if CONFIG_EXT_PARTITION
{31, 31}, // 4X4 - {0b11111, 0b11111}
{31, 30}, // 4X8 - {0b11111, 0b11110}
{30, 31}, // 8X4 - {0b11110, 0b11111}
{30, 30}, // 8X8 - {0b11110, 0b11110}
{30, 28}, // 8X16 - {0b11110, 0b11100}
{28, 30}, // 16X8 - {0b11100, 0b11110}
{28, 28}, // 16X16 - {0b11100, 0b11100}
{28, 24}, // 16X32 - {0b11100, 0b11000}
{24, 28}, // 32X16 - {0b11000, 0b11100}
{24, 24}, // 32X32 - {0b11000, 0b11000}
{24, 16}, // 32X64 - {0b11000, 0b10000}
{16, 24}, // 64X32 - {0b10000, 0b11000}
{16, 16}, // 64X64 - {0b10000, 0b10000}
{16, 0 }, // 64X128- {0b10000, 0b00000}
{0, 16}, // 128X64- {0b00000, 0b10000}
{0, 0 }, // 128X128-{0b00000, 0b00000}
#else
{15, 15}, // 4X4 - {0b1111, 0b1111} {15, 15}, // 4X4 - {0b1111, 0b1111}
{15, 14}, // 4X8 - {0b1111, 0b1110} {15, 14}, // 4X8 - {0b1111, 0b1110}
{14, 15}, // 8X4 - {0b1110, 0b1111} {14, 15}, // 8X4 - {0b1110, 0b1111}
@@ -222,6 +373,7 @@ static const struct {
{8, 0 }, // 32X64 - {0b1000, 0b0000} {8, 0 }, // 32X64 - {0b1000, 0b0000}
{0, 8 }, // 64X32 - {0b0000, 0b1000} {0, 8 }, // 64X32 - {0b0000, 0b1000}
{0, 0 }, // 64X64 - {0b0000, 0b0000} {0, 0 }, // 64X64 - {0b0000, 0b0000}
#endif // CONFIG_EXT_PARTITION
}; };
#if CONFIG_SUPERTX #if CONFIG_SUPERTX

View File

@@ -171,6 +171,13 @@ static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
{ 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split { 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split
{ 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split { 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split
{ 10, 7, 6, 128, 128, 128, 128 }, // a/l both split { 10, 7, 6, 128, 128, 128, 128 }, // a/l both split
#if CONFIG_EXT_PARTITION
// 128x128 -> 64x64
{ 222, 34, 30, 128, 128, 128, 128 }, // a/l both not split
{ 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split
{ 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split
{ 10, 7, 6, 128, 128, 128, 128 }, // a/l both split
#endif // CONFIG_EXT_PARTITION
}; };
#else #else
static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
@@ -195,6 +202,13 @@ static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
{ 72, 16, 44 }, // a split, l not split { 72, 16, 44 }, // a split, l not split
{ 58, 32, 12 }, // l split, a not split { 58, 32, 12 }, // l split, a not split
{ 10, 7, 6 }, // a/l both split { 10, 7, 6 }, // a/l both split
#if CONFIG_EXT_PARTITION
// 128x128 -> 64x64
{ 222, 34, 30 }, // a/l both not split
{ 72, 16, 44 }, // a split, l not split
{ 58, 32, 12 }, // l split, a not split
{ 10, 7, 6 }, // a/l both split
#endif // CONFIG_EXT_PARTITION
}; };
#endif // CONFIG_EXT_PARTITION_TYPES #endif // CONFIG_EXT_PARTITION_TYPES
@@ -256,20 +270,33 @@ static const vpx_prob default_inter_compound_mode_probs
static const vpx_prob default_interintra_prob[BLOCK_SIZES] = { static const vpx_prob default_interintra_prob[BLOCK_SIZES] = {
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
#if CONFIG_EXT_PARTITION
192, 192, 192
#endif // CONFIG_EXT_PARTITION
}; };
static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = { static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = {
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
#if CONFIG_EXT_PARTITION
192, 192, 192
#endif // CONFIG_EXT_PARTITION
}; };
static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = { static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = {
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
#if CONFIG_EXT_PARTITION
192, 192, 192
#endif // CONFIG_EXT_PARTITION
}; };
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
#if CONFIG_OBMC #if CONFIG_OBMC
static const vpx_prob default_obmc_prob[BLOCK_SIZES] = { static const vpx_prob default_obmc_prob[BLOCK_SIZES] = {
255, 255, 255, 151, 153, 144, 178, 165, 160, 207, 195, 168, 244, 255, 255, 255, 151, 153, 144, 178, 165, 160, 207, 195, 168, 244,
#if CONFIG_EXT_PARTITION
// TODO(debargha) What are the correct values for these?
192, 192, 192
#endif // CONFIG_EXT_PARTITION
}; };
#endif // CONFIG_OBMC #endif // CONFIG_OBMC
@@ -389,6 +416,11 @@ vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = {
{ 180, 113, 136, 49, 45, 114}, { 180, 113, 136, 49, 45, 114},
{ 107, 70, 87, 49, 154, 156}, { 107, 70, 87, 49, 154, 156},
{ 98, 105, 142, 63, 64, 152}, { 98, 105, 142, 63, 64, 152},
#if CONFIG_EXT_PARTITION
{ 98, 105, 142, 63, 64, 152},
{ 98, 105, 142, 63, 64, 152},
{ 98, 105, 142, 63, 64, 152},
#endif // CONFIG_EXT_PARTITION
}; };
const vpx_prob const vpx_prob
@@ -403,6 +435,11 @@ vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = {
{ 67, 53, 54, 55, 66, 93}, { 67, 53, 54, 55, 66, 93},
{ 120, 130, 83, 171, 75, 214}, { 120, 130, 83, 171, 75, 214},
{ 72, 55, 66, 68, 79, 107}, { 72, 55, 66, 68, 79, 107},
#if CONFIG_EXT_PARTITION
{ 72, 55, 66, 68, 79, 107},
{ 72, 55, 66, 68, 79, 107},
{ 72, 55, 66, 68, 79, 107},
#endif // CONFIG_EXT_PARTITION
}; };
const vpx_prob const vpx_prob
@@ -418,6 +455,11 @@ vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS]
{ 240, 180, 100, }, { 240, 180, 100, },
{ 240, 180, 100, }, { 240, 180, 100, },
{ 240, 180, 100, }, { 240, 180, 100, },
#if CONFIG_EXT_PARTITION
{ 240, 180, 100, },
{ 240, 180, 100, },
{ 240, 180, 100, },
#endif // CONFIG_EXT_PARTITION
}; };

View File

@@ -32,7 +32,7 @@ extern "C" {
#define PALETTE_COLOR_CONTEXTS 16 #define PALETTE_COLOR_CONTEXTS 16
#define PALETTE_MAX_SIZE 8 #define PALETTE_MAX_SIZE 8
#define PALETTE_BLOCK_SIZES (BLOCK_64X64 - BLOCK_8X8 + 1) #define PALETTE_BLOCK_SIZES (BLOCK_LARGEST - BLOCK_8X8 + 1)
#define PALETTE_Y_MODE_CONTEXTS 3 #define PALETTE_Y_MODE_CONTEXTS 3
struct VP10Common; struct VP10Common;

View File

@@ -18,13 +18,25 @@
extern "C" { extern "C" {
#endif #endif
#define MI_SIZE_LOG2 3 #undef MAX_SB_SIZE
#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6
#if CONFIG_EXT_PARTITION
# define MAX_SB_SIZE_LOG2 7
#else
# define MAX_SB_SIZE_LOG2 6
#endif // CONFIG_EXT_PARTITION
#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2)
#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
#define MI_SIZE_LOG2 3
#define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit #define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit
#define MI_BLOCK_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2)
#define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block #define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block
#define MI_MASK (MI_BLOCK_SIZE - 1) #define MI_MASK (MI_BLOCK_SIZE - 1)
#define MI_MASK_2 (MI_BLOCK_SIZE * 2 - 1)
#if CONFIG_EXT_TILE #if CONFIG_EXT_TILE
# define MAX_TILE_ROWS 1024 # define MAX_TILE_ROWS 1024
@@ -62,19 +74,16 @@ typedef enum BITSTREAM_PROFILE {
#define BLOCK_32X64 10 #define BLOCK_32X64 10
#define BLOCK_64X32 11 #define BLOCK_64X32 11
#define BLOCK_64X64 12 #define BLOCK_64X64 12
#if !CONFIG_EXT_PARTITION
#if CONFIG_EXT_PARTITION # define BLOCK_SIZES 13
#define BLOCK_64X128 13
#define BLOCK_128X64 14
#define BLOCK_128X128 15
#define BLOCK_SIZES 16
#else #else
#define BLOCK_SIZES 13 # define BLOCK_64X128 13
#endif // CONFIG_EXT_PARTITION # define BLOCK_128X64 14
# define BLOCK_128X128 15
#define BLOCK_INVALID (BLOCK_SIZES) # define BLOCK_SIZES 16
#endif // !CONFIG_EXT_PARTITION
#define BLOCK_INVALID BLOCK_SIZES
#define BLOCK_LARGEST (BLOCK_SIZES - 1) #define BLOCK_LARGEST (BLOCK_SIZES - 1)
typedef uint8_t BLOCK_SIZE; typedef uint8_t BLOCK_SIZE;
#if CONFIG_EXT_PARTITION_TYPES #if CONFIG_EXT_PARTITION_TYPES
@@ -104,7 +113,11 @@ typedef enum PARTITION_TYPE {
typedef char PARTITION_CONTEXT; typedef char PARTITION_CONTEXT;
#define PARTITION_PLOFFSET 4 // number of probability models per block size #define PARTITION_PLOFFSET 4 // number of probability models per block size
#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) #if CONFIG_EXT_PARTITION
# define PARTITION_CONTEXTS (5 * PARTITION_PLOFFSET)
#else
# define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
#endif // CONFIG_EXT_PARTITION
// block transform size // block transform size
typedef uint8_t TX_SIZE; typedef uint8_t TX_SIZE;
@@ -114,6 +127,15 @@ typedef uint8_t TX_SIZE;
#define TX_32X32 ((TX_SIZE)3) // 32x32 transform #define TX_32X32 ((TX_SIZE)3) // 32x32 transform
#define TX_SIZES ((TX_SIZE)4) #define TX_SIZES ((TX_SIZE)4)
#define MAX_TX_SIZE_LOG2 5
#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
#define MAX_TX_SQUARE (MAX_TX_SIZE * MAX_TX_SIZE)
// Number of maxium size transform blocks in the maximum size superblock
#define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 \
((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2)
#define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2)
// frame transform mode // frame transform mode
typedef enum { typedef enum {
ONLY_4X4 = 0, // only 4x4 transform used ONLY_4X4 = 0, // only 4x4 transform used
@@ -286,10 +308,15 @@ typedef enum {
/* Segment Feature Masks */ /* Segment Feature Masks */
#define MAX_MV_REF_CANDIDATES 2 #define MAX_MV_REF_CANDIDATES 2
#if CONFIG_REF_MV #if CONFIG_REF_MV
#define MAX_REF_MV_STACK_SIZE 16 #define MAX_REF_MV_STACK_SIZE 16
#if CONFIG_EXT_PARTITION
#define REF_CAT_LEVEL 640
#else
#define REF_CAT_LEVEL 160 #define REF_CAT_LEVEL 160
#endif #endif // CONFIG_EXT_PARTITION
#endif // CONFIG_REF_MV
#define INTRA_INTER_CONTEXTS 4 #define INTRA_INTER_CONTEXTS 4
#define COMP_INTER_CONTEXTS 5 #define COMP_INTER_CONTEXTS 5

View File

@@ -871,6 +871,9 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col,
cm->mi_rows - mi_row : MI_BLOCK_SIZE); cm->mi_rows - mi_row : MI_BLOCK_SIZE);
const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ?
cm->mi_cols - mi_col : MI_BLOCK_SIZE); cm->mi_cols - mi_col : MI_BLOCK_SIZE);
#if CONFIG_EXT_PARTITION
assert(0 && "Not yet updated");
#endif // CONFIG_EXT_PARTITION
vp10_zero(*lfm); vp10_zero(*lfm);
assert(mip[0] != NULL); assert(mip[0] != NULL);
@@ -1045,8 +1048,10 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col,
const uint64_t rows = cm->mi_rows - mi_row; const uint64_t rows = cm->mi_rows - mi_row;
// Each pixel inside the border gets a 1, // Each pixel inside the border gets a 1,
const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1); const uint64_t mask_y =
const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1); (((uint64_t) 1 << (rows << MI_BLOCK_SIZE_LOG2)) - 1);
const uint16_t mask_uv =
(((uint16_t) 1 << (((rows + 1) >> 1) << (MI_BLOCK_SIZE_LOG2 - 1))) - 1);
// Remove values completely outside our border. // Remove values completely outside our border.
for (i = 0; i < TX_32X32; i++) { for (i = 0; i < TX_32X32; i++) {
@@ -1262,7 +1267,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
int tx_size_mask = 0; int tx_size_mask = 0;
// Filter level can vary per MI // Filter level can vary per MI
if (!(lfl[(r << 3) + (c >> ss_x)] = if (!(lfl[(r << MI_BLOCK_SIZE_LOG2) + (c >> ss_x)] =
get_filter_level(&cm->lf_info, mbmi))) get_filter_level(&cm->lf_info, mbmi)))
continue; continue;
@@ -1280,11 +1285,13 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
sb_type, ss_x, ss_y) : sb_type, ss_x, ss_y) :
mbmi->inter_tx_size[blk_row][blk_col]; mbmi->inter_tx_size[blk_row][blk_col];
tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]); tx_size_r = VPXMIN(tx_size,
tx_size_c = VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & 0x07]); cm->above_txfm_context[mi_col + c]);
tx_size_c = VPXMIN(tx_size,
cm->left_txfm_context[(mi_row + r) & MI_MASK]);
cm->above_txfm_context[mi_col + c] = tx_size; cm->above_txfm_context[mi_col + c] = tx_size;
cm->left_txfm_context[(mi_row + r) & 0x07] = tx_size; cm->left_txfm_context[(mi_row + r) & MI_MASK] = tx_size;
#endif #endif
// Build masks based on the transform size of each block // Build masks based on the transform size of each block
@@ -1351,13 +1358,14 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
border_mask = ~(mi_col == 0); border_mask = ~(mi_col == 0);
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
highbd_filter_selectively_vert(CONVERT_TO_SHORTPTR(dst->buf), highbd_filter_selectively_vert(
CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, dst->stride,
mask_16x16_c & border_mask, mask_16x16_c & border_mask,
mask_8x8_c & border_mask, mask_8x8_c & border_mask,
mask_4x4_c & border_mask, mask_4x4_c & border_mask,
mask_4x4_int[r], mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3], &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2],
(int)cm->bit_depth); (int)cm->bit_depth);
} else { } else {
filter_selectively_vert(dst->buf, dst->stride, filter_selectively_vert(dst->buf, dst->stride,
@@ -1365,7 +1373,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
mask_8x8_c & border_mask, mask_8x8_c & border_mask,
mask_4x4_c & border_mask, mask_4x4_c & border_mask,
mask_4x4_int[r], mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3]); &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]);
} }
#else #else
filter_selectively_vert(dst->buf, dst->stride, filter_selectively_vert(dst->buf, dst->stride,
@@ -1373,7 +1381,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
mask_8x8_c & border_mask, mask_8x8_c & border_mask,
mask_4x4_c & border_mask, mask_4x4_c & border_mask,
mask_4x4_int[r], mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3]); &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride; dst->buf += 8 * dst->stride;
mi_8x8 += row_step_stride; mi_8x8 += row_step_stride;
@@ -1400,13 +1408,14 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
} }
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), highbd_filter_selectively_horiz(
CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, dst->stride,
mask_16x16_r, mask_16x16_r,
mask_8x8_r, mask_8x8_r,
mask_4x4_r, mask_4x4_r,
mask_4x4_int_r, mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3], &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2],
(int)cm->bit_depth); (int)cm->bit_depth);
} else { } else {
filter_selectively_horiz(dst->buf, dst->stride, filter_selectively_horiz(dst->buf, dst->stride,
@@ -1414,7 +1423,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
mask_8x8_r, mask_8x8_r,
mask_4x4_r, mask_4x4_r,
mask_4x4_int_r, mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3]); &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]);
} }
#else #else
filter_selectively_horiz(dst->buf, dst->stride, filter_selectively_horiz(dst->buf, dst->stride,
@@ -1422,7 +1431,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
mask_8x8_r, mask_8x8_r,
mask_4x4_r, mask_4x4_r,
mask_4x4_int_r, mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3]); &cm->lf_info, &lfl[r << MI_BLOCK_SIZE_LOG2]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride; dst->buf += 8 * dst->stride;
} }
@@ -1455,16 +1464,18 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm,
highbd_filter_selectively_vert_row2( highbd_filter_selectively_vert_row2(
plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfm->lfl_y[r << 3], (int)cm->bit_depth); &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2], (int)cm->bit_depth);
} else { } else {
filter_selectively_vert_row2( filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]);
} }
#else #else
filter_selectively_vert_row2( filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride; dst->buf += 16 * dst->stride;
mask_16x16 >>= 16; mask_16x16 >>= 16;
@@ -1499,17 +1510,18 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm,
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
highbd_filter_selectively_horiz( highbd_filter_selectively_horiz(
CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3], mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
&lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2],
(int)cm->bit_depth); (int)cm->bit_depth);
} else { } else {
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
&lfm->lfl_y[r << 3]); &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]);
} }
#else #else
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
&lfm->lfl_y[r << 3]); &lfm->lfl_y[r << MI_BLOCK_SIZE_LOG2]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride; dst->buf += 8 * dst->stride;
@@ -1539,8 +1551,10 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm,
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) {
if (plane->plane_type == 1) { if (plane->plane_type == 1) {
for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) {
lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; lfm->lfl_uv[(r << 1) + c] =
lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; lfm->lfl_y[(r << MI_BLOCK_SIZE_LOG2) + (c << 1)];
lfm->lfl_uv[((r + 2) << 1) + c] =
lfm->lfl_y[((r + 2) << MI_BLOCK_SIZE_LOG2) + (c << 1)];
} }
} }
@@ -1632,9 +1646,31 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
VP10_COMMON *cm, VP10_COMMON *cm,
struct macroblockd_plane planes[MAX_MB_PLANE], struct macroblockd_plane planes[MAX_MB_PLANE],
int start, int stop, int y_only) { int start, int stop, int y_only) {
#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
int mi_row, mi_col;
# if CONFIG_VAR_TX
memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
# endif // CONFIG_VAR_TX
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
# if CONFIG_VAR_TX
memset(cm->left_txfm_context, TX_SIZES, MI_BLOCK_SIZE);
# endif // CONFIG_VAR_TX
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
int plane;
vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
for (plane = 0; plane < num_planes; ++plane)
vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
}
}
#else
const int num_planes = y_only ? 1 : MAX_MB_PLANE; const int num_planes = y_only ? 1 : MAX_MB_PLANE;
int mi_row, mi_col; int mi_row, mi_col;
#if !CONFIG_VAR_TX && !CONFIG_EXT_PARTITION_TYPES
enum lf_path path; enum lf_path path;
LOOP_FILTER_MASK lfm; LOOP_FILTER_MASK lfm;
@@ -1646,29 +1682,17 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
path = LF_PATH_444; path = LF_PATH_444;
else else
path = LF_PATH_SLOW; path = LF_PATH_SLOW;
#endif // !CONFIG_VAR_TX && !CONFIG_EXT_PARTITION_TYPES
#if CONFIG_VAR_TX
memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
#endif
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
#if CONFIG_VAR_TX
memset(cm->left_txfm_context, TX_SIZES, 8);
#endif
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
int plane; int plane;
vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION_TYPES
for (plane = 0; plane < num_planes; ++plane)
vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
#else
// TODO(JBB): Make setup_mask work for non 420. // TODO(JBB): Make setup_mask work for non 420.
vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
&lfm);
vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
for (plane = 1; plane < num_planes; ++plane) { for (plane = 1; plane < num_planes; ++plane) {
switch (path) { switch (path) {
@@ -1684,9 +1708,9 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
break; break;
} }
} }
#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION_TYPES
} }
} }
#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
} }
void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame,

View File

@@ -84,8 +84,8 @@ typedef struct {
uint16_t above_uv[TX_SIZES]; uint16_t above_uv[TX_SIZES];
uint16_t left_int_4x4_uv; uint16_t left_int_4x4_uv;
uint16_t above_int_4x4_uv; uint16_t above_int_4x4_uv;
uint8_t lfl_y[64]; uint8_t lfl_y[MI_BLOCK_SIZE * MI_BLOCK_SIZE];
uint8_t lfl_uv[16]; uint8_t lfl_uv[MI_BLOCK_SIZE / 2 * MI_BLOCK_SIZE / 2];
} LOOP_FILTER_MASK; } LOOP_FILTER_MASK;
/* assorted loopfilter functions which get used elsewhere */ /* assorted loopfilter functions which get used elsewhere */

View File

@@ -12,6 +12,7 @@
#include "vp10/common/mvref_common.h" #include "vp10/common/mvref_common.h"
#if CONFIG_REF_MV #if CONFIG_REF_MV
static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi,
const MB_MODE_INFO *const candidate, const MB_MODE_INFO *const candidate,
const MV_REFERENCE_FRAME rf[2], const MV_REFERENCE_FRAME rf[2],
@@ -23,6 +24,8 @@ static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi,
int index = 0, ref; int index = 0, ref;
int newmv_count = 0; int newmv_count = 0;
assert(2 * weight < REF_CAT_LEVEL);
if (rf[1] == NONE) { if (rf[1] == NONE) {
// single reference frame // single reference frame
for (ref = 0; ref < 2; ++ref) { for (ref = 0; ref < 2; ++ref) {
@@ -246,32 +249,30 @@ static uint8_t scan_blk_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd,
return newmv_count; return newmv_count;
} }
// This function assumes MI blocks are 8x8 and coding units are 64x64
static int has_top_right(const MACROBLOCKD *xd, static int has_top_right(const MACROBLOCKD *xd,
int mi_row, int mi_col, int bs) { int mi_row, int mi_col, int bs) {
// In a split partition all apart from the bottom right has a top right // In a split partition all apart from the bottom right has a top right
int has_tr = !((mi_row & bs) & (bs * 2 - 1)) || int has_tr = !((mi_row & bs) && (mi_col & bs));
!((mi_col & bs) & (bs * 2 - 1));
// bs > 0 and bs is a power of 2
assert(bs > 0 && !(bs & (bs - 1)));
// Filter out partial right-most boundaries
// For each 4x4 group of blocks, when the bottom right is decoded the blocks // For each 4x4 group of blocks, when the bottom right is decoded the blocks
// to the right have not been decoded therefore the second from bottom in the // to the right have not been decoded therefore the bottom right does
// right-most column does not have a top right // not have a top right
if ((mi_col & bs) & (bs * 2 - 1)) { while (bs < MI_BLOCK_SIZE) {
if (((mi_col & (2 * bs)) & (bs * 4 - 1)) && if (mi_col & bs) {
((mi_row & (2 * bs)) & (bs * 4 - 1))) if ((mi_col & (2 * bs)) && (mi_row & (2 * bs))) {
has_tr = 0; has_tr = 0;
break;
}
} else {
break;
}
bs <<= 1;
} }
// If the right had side of the block lines up with the right had edge end of // The left hand of two vertical rectangles always has a top right (as the
// a group of 8x8 MI blocks (i.e. edge of a coding unit) and is not on the top
// row of that coding unit, it does not have a top right
if (has_tr)
if (((mi_col + xd->n8_w) & 0x07) == 0)
if ((mi_row & 0x07) > 0)
has_tr = 0;
// The left had of two vertical rectangles always has a top right (as the
// block above will have been decoded) // block above will have been decoded)
if (xd->n8_w < xd->n8_h) if (xd->n8_w < xd->n8_h)
if (!xd->is_sec_rect) if (!xd->is_sec_rect)
@@ -359,8 +360,11 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd,
nearest_refmv_count = *refmv_count; nearest_refmv_count = *refmv_count;
for (idx = 0; idx < nearest_refmv_count; ++idx) for (idx = 0; idx < nearest_refmv_count; ++idx) {
assert(ref_mv_stack[idx].weight > 0 &&
ref_mv_stack[idx].weight < REF_CAT_LEVEL);
ref_mv_stack[idx].weight += REF_CAT_LEVEL; ref_mv_stack[idx].weight += REF_CAT_LEVEL;
}
if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame
&& rf[1] == NONE) { && rf[1] == NONE) {

View File

@@ -120,7 +120,16 @@ static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
// 64X32 // 64X32
{{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}}, {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
// 64X64 // 64X64
{{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}} {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}},
#if CONFIG_EXT_PARTITION
// TODO(debargha/jingning) Making them twice the 32x64, .. ones above
// 64x128
{{0, -2}, {-2, 0}, {8, -2}, {-2, 4}, {-2, -2}, {0, -6}, {-6, 0}, {4, -2}},
// 128x64
{{-2, 0}, {0, -2}, {-2, 8}, {4, -2}, {-2, -2}, {-6, 0}, {0, -6}, {-2, 4}},
// 128x128
{{-2, 6}, {6, -2}, {-2, 8}, {8, -2}, {-2, -2}, {-2, 0}, {0, -2}, {-2, 12}},
#endif // CONFIG_EXT_PARTITION
}; };
static const int idx_n_column_to_subblock[4][2] = { static const int idx_n_column_to_subblock[4][2] = {
@@ -131,7 +140,11 @@ static const int idx_n_column_to_subblock[4][2] = {
}; };
// clamp_mv_ref // clamp_mv_ref
#define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units #if CONFIG_EXT_PARTITION
# define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
#else
# define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units
#endif // CONFIG_EXT_PARTITION
static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) { static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER, clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER,

View File

@@ -332,7 +332,7 @@ typedef struct VP10Common {
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
TXFM_CONTEXT *above_txfm_context; TXFM_CONTEXT *above_txfm_context;
TXFM_CONTEXT left_txfm_context[8]; TXFM_CONTEXT left_txfm_context[MI_BLOCK_SIZE];
#endif #endif
int above_context_alloc_cols; int above_context_alloc_cols;
@@ -440,7 +440,7 @@ static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd,
static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
const int above_idx = mi_col * 2; const int above_idx = mi_col * 2;
const int left_idx = (mi_row * 2) & 15; // FIXME: Mask should be CU_SIZE*2-1 const int left_idx = (mi_row * 2) & MI_MASK_2;
int i; int i;
for (i = 0; i < MAX_MB_PLANE; ++i) { for (i = 0; i < MAX_MB_PLANE; ++i) {
struct macroblockd_plane *const pd = &xd->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i];

View File

@@ -454,52 +454,52 @@ void vp10_make_masked_inter_predictor(
const MACROBLOCKD *xd) { const MACROBLOCKD *xd) {
const MODE_INFO *mi = xd->mi[0]; const MODE_INFO *mi = xd->mi[0];
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE]; uint8_t tmp_dst_[2 * MAX_SB_SQUARE];
uint8_t *tmp_dst = uint8_t *tmp_dst =
(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_;
vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE, vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
subpel_x, subpel_y, sf, w, h, 0, subpel_x, subpel_y, sf, w, h, 0,
interp_filter, xs, ys, xd); interp_filter, xs, ys, xd);
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
build_masked_compound_extend_highbd( build_masked_compound_extend_highbd(
dst, dst_stride, tmp_dst, CU_SIZE, plane, dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w); wedge_offset_y, wedge_offset_x, h, w);
else else
build_masked_compound_extend( build_masked_compound_extend(
dst, dst_stride, tmp_dst, CU_SIZE, plane, dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w); wedge_offset_y, wedge_offset_x, h, w);
#else #else
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
build_masked_compound_highbd( build_masked_compound_highbd(
dst, dst_stride, tmp_dst, CU_SIZE, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, h, w); mi->mbmi.sb_type, h, w);
else else
build_masked_compound( build_masked_compound(
dst, dst_stride, tmp_dst, CU_SIZE, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, h, w); mi->mbmi.sb_type, h, w);
#endif // CONFIG_SUPERTX #endif // CONFIG_SUPERTX
#else // CONFIG_VP9_HIGHBITDEPTH #else // CONFIG_VP9_HIGHBITDEPTH
uint8_t tmp_dst[CU_SIZE * CU_SIZE]; uint8_t tmp_dst[MAX_SB_SQUARE];
vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE, vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
subpel_x, subpel_y, sf, w, h, 0, subpel_x, subpel_y, sf, w, h, 0,
interp_filter, xs, ys, xd); interp_filter, xs, ys, xd);
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
build_masked_compound_extend( build_masked_compound_extend(
dst, dst_stride, tmp_dst, CU_SIZE, plane, dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w); wedge_offset_y, wedge_offset_x, h, w);
#else #else
build_masked_compound( build_masked_compound(
dst, dst_stride, tmp_dst, CU_SIZE, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, h, w); mi->mbmi.sb_type, h, w);
#endif // CONFIG_SUPERTX #endif // CONFIG_SUPERTX
@@ -877,12 +877,13 @@ void vp10_build_masked_inter_predictor_complex(
int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
PARTITION_TYPE partition, int plane) { PARTITION_TYPE partition, int plane) {
int i, j; int i, j;
uint8_t mask[MAXTXLEN]; uint8_t mask[MAX_TX_SIZE];
int top_w = 4 << b_width_log2_lookup[top_bsize], int top_w = 4 << b_width_log2_lookup[top_bsize];
top_h = 4 << b_height_log2_lookup[top_bsize]; int top_h = 4 << b_height_log2_lookup[top_bsize];
int w = 4 << b_width_log2_lookup[bsize], h = 4 << b_height_log2_lookup[bsize]; int w = 4 << b_width_log2_lookup[bsize];
int w_offset = (mi_col - mi_col_ori) << 3, int h = 4 << b_height_log2_lookup[bsize];
h_offset = (mi_row - mi_row_ori) << 3; int w_offset = (mi_col - mi_col_ori) * MI_SIZE;
int h_offset = (mi_row - mi_row_ori) * MI_SIZE;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
uint16_t *dst16= CONVERT_TO_SHORTPTR(dst); uint16_t *dst16= CONVERT_TO_SHORTPTR(dst);
@@ -890,6 +891,8 @@ void vp10_build_masked_inter_predictor_complex(
int b_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; int b_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
assert(bsize <= BLOCK_32X32);
top_w >>= pd->subsampling_x; top_w >>= pd->subsampling_x;
top_h >>= pd->subsampling_y; top_h >>= pd->subsampling_y;
w >>= pd->subsampling_x; w >>= pd->subsampling_x;
@@ -916,7 +919,8 @@ void vp10_build_masked_inter_predictor_complex(
if (m == 0) if (m == 0)
dst_tmp[j] = dst2_tmp[j]; dst_tmp[j] = dst2_tmp[j];
else else
dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m +
dst2_tmp[j] * (64 - m), 6);
} }
dst_tmp += dst_stride; dst_tmp += dst_stride;
dst2_tmp += dst2_stride; dst2_tmp += dst2_stride;
@@ -943,7 +947,8 @@ void vp10_build_masked_inter_predictor_complex(
if (m == 0) if (m == 0)
dst_tmp[j] = dst2_tmp[j]; dst_tmp[j] = dst2_tmp[j];
else else
dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m +
dst2_tmp[j] * (64 - m), 6);
} }
dst_tmp += dst_stride; dst_tmp += dst_stride;
dst2_tmp += dst2_stride; dst2_tmp += dst2_stride;
@@ -978,7 +983,8 @@ void vp10_build_masked_inter_predictor_complex(
if (m == 0) if (m == 0)
dst_tmp[j] = dst2_tmp[j]; dst_tmp[j] = dst2_tmp[j];
else else
dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m +
dst2_tmp[j] * (64 - m), 6);
} }
memcpy(dst_tmp + j, dst2_tmp + j, memcpy(dst_tmp + j, dst2_tmp + j,
(top_w - w_offset - w) * sizeof(uint16_t)); (top_w - w_offset - w) * sizeof(uint16_t));
@@ -1001,7 +1007,8 @@ void vp10_build_masked_inter_predictor_complex(
if (m == 0) if (m == 0)
dst_tmp[j] = dst2_tmp[j]; dst_tmp[j] = dst2_tmp[j];
else else
dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; dst_tmp[j] = ROUND_POWER_OF_TWO(dst_tmp[j] * m +
dst2_tmp[j] * (64 - m), 6);
} }
memcpy(dst_tmp + j, dst2_tmp + j, memcpy(dst_tmp + j, dst2_tmp + j,
(top_w - w_offset - w) * sizeof(uint8_t)); (top_w - w_offset - w) * sizeof(uint8_t));
@@ -1158,12 +1165,39 @@ static const uint8_t obmc_mask_16[2][16] = {
}; };
static const uint8_t obmc_mask_32[2][32] = { static const uint8_t obmc_mask_32[2][32] = {
{ 33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55, { 33, 35, 36, 38, 40, 41, 43, 44,
56, 57, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64}, 45, 47, 48, 50, 51, 52, 53, 55,
{ 31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11, 9, 56, 57, 58, 59, 60, 60, 61, 62,
8, 7, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, 0, 0, 0} 62, 63, 63, 64, 64, 64, 64, 64 },
{ 31, 29, 28, 26, 24, 23, 21, 20,
19, 17, 16, 14, 13, 12, 11, 9,
8, 7, 6, 5, 4, 4, 3, 2,
2, 1, 1, 0, 0, 0, 0, 0 }
}; };
#if CONFIG_EXT_PARTITION
// TODO(debargha): What are the correct values here?
static const uint8_t obmc_mask_64[2][64] = {
{ 33, 33, 35, 35, 36, 36, 38, 38,
40, 40, 41, 41, 43, 43, 44, 44,
45, 45, 47, 47, 48, 48, 50, 50,
51, 51, 52, 52, 53, 53, 55, 55,
56, 56, 57, 57, 58, 58, 59, 59,
60, 60, 60, 60, 61, 61, 62, 62,
62, 62, 63, 63, 63, 63, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64 },
{ 31, 31, 29, 29, 28, 28, 26, 26,
24, 24, 23, 23, 21, 21, 20, 20,
19, 19, 17, 17, 16, 16, 14, 14,
13, 13, 12, 12, 11, 11, 9, 9,
8, 8, 7, 7, 6, 6, 5, 5,
4, 4, 4, 4, 3, 3, 2, 2,
2, 2, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 }
};
#endif // CONFIG_EXT_PARTITION
void setup_obmc_mask(int length, const uint8_t *mask[2]) { void setup_obmc_mask(int length, const uint8_t *mask[2]) {
switch (length) { switch (length) {
case 1: case 1:
@@ -1190,9 +1224,15 @@ void setup_obmc_mask(int length, const uint8_t *mask[2]) {
mask[0] = obmc_mask_32[0]; mask[0] = obmc_mask_32[0];
mask[1] = obmc_mask_32[1]; mask[1] = obmc_mask_32[1];
break; break;
#if CONFIG_EXT_PARTITION
case 64:
mask[0] = obmc_mask_64[0];
mask[1] = obmc_mask_64[1];
break;
#endif // CONFIG_EXT_PARTITION
default: default:
mask[0] = obmc_mask_32[0]; mask[0] = NULL;
mask[1] = obmc_mask_32[1]; mask[1] = NULL;
assert(0); assert(0);
break; break;
} }
@@ -1265,15 +1305,15 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
for (plane = 0; plane < MAX_MB_PLANE; ++plane) { for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane]; const struct macroblockd_plane *pd = &xd->plane[plane];
int bw = (mi_step * 8) >> pd->subsampling_x; int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
int bh = overlap >> pd->subsampling_y; int bh = overlap >> pd->subsampling_y;
int row, col; int row, col;
int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride; int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride;
uint8_t *dst = use_tmp_dst_buf ? uint8_t *dst = use_tmp_dst_buf ?
&final_buf[plane][(i * 8) >> pd->subsampling_x] : &final_buf[plane][(i * MI_SIZE) >> pd->subsampling_x] :
&pd->dst.buf[(i * 8) >> pd->subsampling_x]; &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x];
int tmp_stride = tmp_stride1[plane]; int tmp_stride = tmp_stride1[plane];
uint8_t *tmp = &tmp_buf1[plane][(i * 8) >> pd->subsampling_x]; uint8_t *tmp = &tmp_buf1[plane][(i * MI_SIZE) >> pd->subsampling_x];
const uint8_t *mask[2]; const uint8_t *mask[2];
setup_obmc_mask(bh, mask); setup_obmc_mask(bh, mask);
@@ -1285,8 +1325,9 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
for (row = 0; row < bh; ++row) { for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) for (col = 0; col < bw; ++col)
dst16[col] = (mask[0][row] * dst16[col] + mask[1][row] * tmp16[col] dst16[col] = ROUND_POWER_OF_TWO(mask[0][row] * dst16[col] +
+ 32) >> 6; mask[1][row] * tmp16[col], 6);
dst16 += dst_stride; dst16 += dst_stride;
tmp16 += tmp_stride; tmp16 += tmp_stride;
} }
@@ -1294,8 +1335,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) { for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) for (col = 0; col < bw; ++col)
dst[col] = (mask[0][row] * dst[col] + mask[1][row] * tmp[col] + 32) dst[col] = ROUND_POWER_OF_TWO(mask[0][row] * dst[col] +
>> 6; mask[1][row] * tmp[col], 6);
dst += dst_stride; dst += dst_stride;
tmp += tmp_stride; tmp += tmp_stride;
} }
@@ -1332,15 +1373,15 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
for (plane = 0; plane < MAX_MB_PLANE; ++plane) { for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane]; const struct macroblockd_plane *pd = &xd->plane[plane];
int bw = overlap >> pd->subsampling_x; int bw = overlap >> pd->subsampling_x;
int bh = (mi_step * 8) >> pd->subsampling_y; int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
int row, col; int row, col;
int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride; int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride;
uint8_t *dst = use_tmp_dst_buf ? uint8_t *dst = use_tmp_dst_buf ?
&final_buf[plane][(i * 8 * dst_stride) >> pd->subsampling_y] : &final_buf[plane][(i * MI_SIZE * dst_stride) >> pd->subsampling_y] :
&pd->dst.buf[(i * 8 * dst_stride) >> pd->subsampling_y]; &pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y];
int tmp_stride = tmp_stride2[plane]; int tmp_stride = tmp_stride2[plane];
uint8_t *tmp = &tmp_buf2[plane] uint8_t *tmp = &tmp_buf2[plane]
[(i * 8 * tmp_stride) >> pd->subsampling_y]; [(i * MI_SIZE * tmp_stride) >> pd->subsampling_y];
const uint8_t *mask[2]; const uint8_t *mask[2];
setup_obmc_mask(bw, mask); setup_obmc_mask(bw, mask);
@@ -1352,8 +1393,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
for (row = 0; row < bh; ++row) { for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) for (col = 0; col < bw; ++col)
dst16[col] = (mask[0][col] * dst16[col] + mask[1][col] * tmp16[col] dst16[col] = ROUND_POWER_OF_TWO(mask[0][col] * dst16[col] +
+ 32) >> 6; mask[1][col] * tmp16[col], 6);
dst16 += dst_stride; dst16 += dst_stride;
tmp16 += tmp_stride; tmp16 += tmp_stride;
} }
@@ -1361,8 +1402,8 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) { for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) for (col = 0; col < bw; ++col)
dst[col] = (mask[0][col] * dst[col] + mask[1][col] * tmp[col] + 32) dst[col] = ROUND_POWER_OF_TWO(mask[0][col] * dst[col] +
>> 6; mask[1][col] * tmp[col], 6);
dst += dst_stride; dst += dst_stride;
tmp += tmp_stride; tmp += tmp_stride;
} }
@@ -1572,7 +1613,31 @@ static void combine_interintra(PREDICTION_MODE mode,
static const int scale_bits = 8; static const int scale_bits = 8;
static const int scale_max = 256; static const int scale_max = 256;
static const int scale_round = 127; static const int scale_round = 127;
static const int weights1d[64] = { #if CONFIG_EXT_PARTITION
// TODO(debargha): Fill in the correct weights for 128 wide blocks.
static const int weights1d[MAX_SB_SIZE] = {
128, 128, 125, 125, 122, 122, 119, 119,
116, 116, 114, 114, 111, 111, 109, 109,
107, 107, 105, 105, 103, 103, 101, 101,
99, 99, 97, 97, 96, 96, 94, 94,
93, 93, 91, 91, 90, 90, 89, 89,
88, 88, 86, 86, 85, 85, 84, 84,
83, 83, 82, 82, 81, 81, 81, 81,
80, 80, 79, 79, 78, 78, 78, 78,
77, 77, 76, 76, 76, 76, 75, 75,
75, 75, 74, 74, 74, 74, 73, 73,
73, 73, 72, 72, 72, 72, 71, 71,
71, 71, 71, 71, 70, 70, 70, 70,
70, 70, 70, 70, 69, 69, 69, 69,
69, 69, 69, 69, 68, 68, 68, 68,
68, 68, 68, 68, 68, 68, 67, 67,
67, 67, 67, 67, 67, 67, 67, 67,
};
static int size_scales[BLOCK_SIZES] = {
32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1
};
#else
static const int weights1d[MAX_SB_SIZE] = {
128, 125, 122, 119, 116, 114, 111, 109, 128, 125, 122, 119, 116, 114, 111, 109,
107, 105, 103, 101, 99, 97, 96, 94, 107, 105, 103, 101, 99, 97, 96, 94,
93, 91, 90, 89, 88, 86, 85, 84, 93, 91, 90, 89, 88, 86, 85, 84,
@@ -1582,14 +1647,14 @@ static void combine_interintra(PREDICTION_MODE mode,
70, 70, 69, 69, 69, 69, 68, 68, 70, 70, 69, 69, 69, 69, 68, 68,
68, 68, 68, 67, 67, 67, 67, 67, 68, 68, 68, 67, 67, 67, 67, 67,
}; };
const int bw = 4 << b_width_log2_lookup[plane_bsize]; static int size_scales[BLOCK_SIZES] = {
const int bh = 4 << b_height_log2_lookup[plane_bsize]; 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1
};
#endif // CONFIG_EXT_PARTITION
int size = VPXMAX(bw, bh); const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
int size_scale = (size >= 64 ? 1 : const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
size == 32 ? 2 : const int size_scale = size_scales[plane_bsize];
size == 16 ? 4 :
size == 8 ? 8 : 16);
int i, j; int i, j;
if (use_wedge_interintra && get_wedge_bits(bsize)) { if (use_wedge_interintra && get_wedge_bits(bsize)) {
@@ -1712,7 +1777,31 @@ static void combine_interintra_highbd(PREDICTION_MODE mode,
static const int scale_bits = 8; static const int scale_bits = 8;
static const int scale_max = 256; static const int scale_max = 256;
static const int scale_round = 127; static const int scale_round = 127;
static const int weights1d[64] = { #if CONFIG_EXT_PARTITION
// TODO(debargha): Fill in the correct weights for 128 wide blocks.
static const int weights1d[MAX_SB_SIZE] = {
128, 128, 125, 125, 122, 122, 119, 119,
116, 116, 114, 114, 111, 111, 109, 109,
107, 107, 105, 105, 103, 103, 101, 101,
99, 99, 97, 97, 96, 96, 94, 94,
93, 93, 91, 91, 90, 90, 89, 89,
88, 88, 86, 86, 85, 85, 84, 84,
83, 83, 82, 82, 81, 81, 81, 81,
80, 80, 79, 79, 78, 78, 78, 78,
77, 77, 76, 76, 76, 76, 75, 75,
75, 75, 74, 74, 74, 74, 73, 73,
73, 73, 72, 72, 72, 72, 71, 71,
71, 71, 71, 71, 70, 70, 70, 70,
70, 70, 70, 70, 69, 69, 69, 69,
69, 69, 69, 69, 68, 68, 68, 68,
68, 68, 68, 68, 68, 68, 67, 67,
67, 67, 67, 67, 67, 67, 67, 67,
};
static int size_scales[BLOCK_SIZES] = {
32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1
};
#else
static const int weights1d[MAX_SB_SIZE] = {
128, 125, 122, 119, 116, 114, 111, 109, 128, 125, 122, 119, 116, 114, 111, 109,
107, 105, 103, 101, 99, 97, 96, 94, 107, 105, 103, 101, 99, 97, 96, 94,
93, 91, 90, 89, 88, 86, 85, 84, 93, 91, 90, 89, 88, 86, 85, 84,
@@ -1722,15 +1811,16 @@ static void combine_interintra_highbd(PREDICTION_MODE mode,
70, 70, 69, 69, 69, 69, 68, 68, 70, 70, 69, 69, 69, 69, 68, 68,
68, 68, 68, 67, 67, 67, 67, 67, 68, 68, 68, 67, 67, 67, 67, 67,
}; };
const int bw = 4 << b_width_log2_lookup[plane_bsize]; static int size_scales[BLOCK_SIZES] = {
const int bh = 4 << b_height_log2_lookup[plane_bsize]; 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1
};
#endif // CONFIG_EXT_PARTITION
int size = VPXMAX(bw, bh); const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
int size_scale = (size >= 64 ? 1 : const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
size == 32 ? 2 : const int size_scale = size_scales[plane_bsize];
size == 16 ? 4 :
size == 8 ? 8 : 16);
int i, j; int i, j;
uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8); uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8);
uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8); uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8);
uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8); uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8);
@@ -1889,8 +1979,7 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd,
const int bw = 4 << b_width_log2_lookup[bsize]; const int bw = 4 << b_width_log2_lookup[bsize];
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
intrapredictor[CU_SIZE * CU_SIZE]);
build_intra_predictors_for_interintra( build_intra_predictors_for_interintra(
xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride, xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride,
CONVERT_TO_BYTEPTR(intrapredictor), bw, CONVERT_TO_BYTEPTR(intrapredictor), bw,
@@ -1907,7 +1996,7 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd,
} }
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
{ {
uint8_t intrapredictor[CU_SIZE * CU_SIZE]; uint8_t intrapredictor[MAX_SB_SQUARE];
build_intra_predictors_for_interintra( build_intra_predictors_for_interintra(
xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride, xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride,
intrapredictor, bw, intrapredictor, bw,
@@ -1931,8 +2020,7 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd,
const int bw = 4 << b_width_log2_lookup[uvbsize]; const int bw = 4 << b_width_log2_lookup[uvbsize];
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]);
uintrapredictor[CU_SIZE * CU_SIZE]);
build_intra_predictors_for_interintra( build_intra_predictors_for_interintra(
xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
CONVERT_TO_BYTEPTR(uintrapredictor), bw, CONVERT_TO_BYTEPTR(uintrapredictor), bw,
@@ -1950,7 +2038,7 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd,
} }
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
{ {
uint8_t uintrapredictor[CU_SIZE * CU_SIZE]; uint8_t uintrapredictor[MAX_SB_SQUARE];
build_intra_predictors_for_interintra( build_intra_predictors_for_interintra(
xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
uintrapredictor, bw, uintrapredictor, bw,
@@ -2117,30 +2205,30 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
if (ref && get_wedge_bits(mi->mbmi.sb_type) if (ref && get_wedge_bits(mi->mbmi.sb_type)
&& mi->mbmi.use_wedge_interinter) { && mi->mbmi.use_wedge_interinter) {
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE]; uint8_t tmp_dst_[2 * MAX_SB_SQUARE];
uint8_t *tmp_dst = uint8_t *tmp_dst =
(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_;
#else #else
uint8_t tmp_dst[CU_SIZE * CU_SIZE]; uint8_t tmp_dst[MAX_SB_SQUARE];
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int k; int k;
for (k = 0; k < h; ++k) for (k = 0; k < h; ++k)
memcpy(tmp_dst_ + 2 * CU_SIZE * k, ext_dst1 + memcpy(tmp_dst_ + 2 * MAX_SB_SIZE * k, ext_dst1 +
ext_dst_stride1 * 2 * k, w * 2); ext_dst_stride1 * 2 * k, w * 2);
} else { } else {
int k; int k;
for (k = 0; k < h; ++k) for (k = 0; k < h; ++k)
memcpy(tmp_dst_ + CU_SIZE * k, ext_dst1 + memcpy(tmp_dst_ + MAX_SB_SIZE * k, ext_dst1 +
ext_dst_stride1 * k, w); ext_dst_stride1 * k, w);
} }
#else #else
{ {
int k; int k;
for (k = 0; k < h; ++k) for (k = 0; k < h; ++k)
memcpy(tmp_dst + CU_SIZE * k, ext_dst1 + memcpy(tmp_dst + MAX_SB_SIZE * k, ext_dst1 +
ext_dst_stride1 * k, w); ext_dst_stride1 * k, w);
} }
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
@@ -2149,20 +2237,20 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
build_masked_compound_extend_highbd( build_masked_compound_extend_highbd(
dst, dst_buf->stride, tmp_dst, CU_SIZE, plane, dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w); wedge_offset_y, wedge_offset_x, h, w);
} else { } else {
build_masked_compound_extend( build_masked_compound_extend(
dst, dst_buf->stride, tmp_dst, CU_SIZE, plane, dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w); wedge_offset_y, wedge_offset_x, h, w);
} }
#else #else
build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, build_masked_compound_extend(dst, dst_buf->stride, tmp_dst,
CU_SIZE, plane, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w); wedge_offset_y, wedge_offset_x, h, w);
@@ -2171,12 +2259,12 @@ static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst,
CU_SIZE, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, h, w); mi->mbmi.sb_type, h, w);
else else
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
build_masked_compound(dst, dst_buf->stride, tmp_dst, CU_SIZE, build_masked_compound(dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_index,
mi->mbmi.sb_type, h, w); mi->mbmi.sb_type, h, w);
#endif // CONFIG_SUPERTX #endif // CONFIG_SUPERTX

View File

@@ -443,8 +443,8 @@ void vp10_build_prediction_by_left_preds(VP10_COMMON *cm,
#endif // CONFIG_OBMC #endif // CONFIG_OBMC
#if CONFIG_EXT_INTER #if CONFIG_EXT_INTER
#define MASK_MASTER_SIZE (2 * CU_SIZE) #define MASK_MASTER_SIZE (2 * MAX_SB_SIZE)
#define MASK_MASTER_STRIDE (2 * CU_SIZE) #define MASK_MASTER_STRIDE (2 * MAX_SB_SIZE)
void vp10_init_wedge_masks(); void vp10_init_wedge_masks();

View File

@@ -44,30 +44,30 @@ static const uint8_t extend_modes[INTRA_MODES] = {
NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // TM NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // TM
}; };
static const uint8_t orders_64x64[1] = { 0 }; static const uint8_t orders_128x128[1] = { 0 };
static const uint8_t orders_64x32[2] = { 0, 1 }; static const uint8_t orders_128x64[2] = { 0, 1 };
static const uint8_t orders_32x64[2] = { 0, 1 }; static const uint8_t orders_64x128[2] = { 0, 1 };
static const uint8_t orders_32x32[4] = { static const uint8_t orders_64x64[4] = {
0, 1, 0, 1,
2, 3, 2, 3,
}; };
static const uint8_t orders_32x16[8] = { static const uint8_t orders_64x32[8] = {
0, 2, 0, 2,
1, 3, 1, 3,
4, 6, 4, 6,
5, 7, 5, 7,
}; };
static const uint8_t orders_16x32[8] = { static const uint8_t orders_32x64[8] = {
0, 1, 2, 3, 0, 1, 2, 3,
4, 5, 6, 7, 4, 5, 6, 7,
}; };
static const uint8_t orders_16x16[16] = { static const uint8_t orders_32x32[16] = {
0, 1, 4, 5, 0, 1, 4, 5,
2, 3, 6, 7, 2, 3, 6, 7,
8, 9, 12, 13, 8, 9, 12, 13,
10, 11, 14, 15, 10, 11, 14, 15,
}; };
static const uint8_t orders_16x8[32] = { static const uint8_t orders_32x16[32] = {
0, 2, 8, 10, 0, 2, 8, 10,
1, 3, 9, 11, 1, 3, 9, 11,
4, 6, 12, 14, 4, 6, 12, 14,
@@ -77,13 +77,13 @@ static const uint8_t orders_16x8[32] = {
20, 22, 28, 30, 20, 22, 28, 30,
21, 23, 29, 31, 21, 23, 29, 31,
}; };
static const uint8_t orders_8x16[32] = { static const uint8_t orders_16x32[32] = {
0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11,
4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15,
16, 17, 18, 19, 24, 25, 26, 27, 16, 17, 18, 19, 24, 25, 26, 27,
20, 21, 22, 23, 28, 29, 30, 31, 20, 21, 22, 23, 28, 29, 30, 31,
}; };
static const uint8_t orders_8x8[64] = { static const uint8_t orders_16x16[64] = {
0, 1, 4, 5, 16, 17, 20, 21, 0, 1, 4, 5, 16, 17, 20, 21,
2, 3, 6, 7, 18, 19, 22, 23, 2, 3, 6, 7, 18, 19, 22, 23,
8, 9, 12, 13, 24, 25, 28, 29, 8, 9, 12, 13, 24, 25, 28, 29,
@@ -93,24 +93,96 @@ static const uint8_t orders_8x8[64] = {
40, 41, 44, 45, 56, 57, 60, 61, 40, 41, 44, 45, 56, 57, 60, 61,
42, 43, 46, 47, 58, 59, 62, 63, 42, 43, 46, 47, 58, 59, 62, 63,
}; };
static const uint8_t *const orders[BLOCK_SIZES] = {
orders_8x8, orders_8x8, orders_8x8, orders_8x8, #if CONFIG_EXT_PARTITION
orders_8x16, orders_16x8, orders_16x16, static const uint8_t orders_16x8[128] = {
orders_16x32, orders_32x16, orders_32x32, 0, 2, 8, 10, 32, 34, 40, 42,
orders_32x64, orders_64x32, orders_64x64, 1, 3, 9, 11, 33, 35, 41, 43,
4, 6, 12, 14, 36, 38, 44, 46,
5, 7, 13, 15, 37, 39, 45, 47,
16, 18, 24, 26, 48, 50, 56, 58,
17, 19, 25, 27, 49, 51, 57, 59,
20, 22, 28, 30, 52, 54, 60, 62,
21, 23, 29, 31, 53, 55, 61, 63,
64, 66, 72, 74, 96, 98, 104, 106,
65, 67, 73, 75, 97, 99, 105, 107,
68, 70, 76, 78, 100, 102, 108, 110,
69, 71, 77, 79, 101, 103, 109, 111,
80, 82, 88, 90, 112, 114, 120, 122,
81, 83, 89, 91, 113, 115, 121, 123,
84, 86, 92, 94, 116, 118, 124, 126,
85, 87, 93, 95, 117, 119, 125, 127,
}; };
static const uint8_t orders_8x16[128] = {
0, 1, 2, 3, 8, 9, 10, 11, 32, 33, 34, 35, 40, 41, 42, 43,
4, 5, 6, 7, 12, 13, 14, 15, 36, 37, 38, 39, 44, 45, 46, 47,
16, 17, 18, 19, 24, 25, 26, 27, 48, 49, 50, 51, 56, 57, 58, 59,
20, 21, 22, 23, 28, 29, 30, 31, 52, 53, 54, 55, 60, 61, 62, 63,
64, 65, 66, 67, 72, 73, 74, 75, 96, 97, 98, 99, 104, 105, 106, 107,
68, 69, 70, 71, 76, 77, 78, 79, 100, 101, 102, 103, 108, 109, 110, 111,
80, 81, 82, 83, 88, 89, 90, 91, 112, 113, 114, 115, 120, 121, 122, 123,
84, 85, 86, 87, 92, 93, 94, 95, 116, 117, 118, 119, 124, 125, 126, 127,
};
static const uint8_t orders_8x8[256] = {
0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85,
2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, 86, 87,
8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, 89, 92, 93,
10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, 90, 91, 94, 95,
32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, 101, 112, 113, 116, 117,
34, 35, 38, 39, 50, 51, 54, 55, 98, 99, 102, 103, 114, 115, 118, 119,
40, 41, 44, 45, 56, 57, 60, 61, 104, 105, 108, 109, 120, 121, 124, 125,
42, 43, 46, 47, 58, 59, 62, 63, 106, 107, 110, 111, 122, 123, 126, 127,
128, 129, 132, 133, 144, 145, 148, 149, 192, 193, 196, 197, 208, 209, 212, 213,
130, 131, 134, 135, 146, 147, 150, 151, 194, 195, 198, 199, 210, 211, 214, 215,
136, 137, 140, 141, 152, 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221,
138, 139, 142, 143, 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223,
160, 161, 164, 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245,
162, 163, 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247,
168, 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253,
170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, 255,
};
static const uint8_t *const orders[BLOCK_SIZES] = {
// 4X4
orders_8x8,
// 4X8, 8X4, 8X8
orders_8x8, orders_8x8, orders_8x8,
// 8X16, 16X8, 16X16
orders_8x16, orders_16x8, orders_16x16,
// 16X32, 32X16, 32X32
orders_16x32, orders_32x16, orders_32x32,
// 32X64, 64X32, 64X64
orders_32x64, orders_64x32, orders_64x64,
// 64x128, 128x64, 128x128
orders_64x128, orders_128x64, orders_128x128
};
#else
static const uint8_t *const orders[BLOCK_SIZES] = {
// 4X4
orders_16x16,
// 4X8, 8X4, 8X8
orders_16x16, orders_16x16, orders_16x16,
// 8X16, 16X8, 16X16
orders_16x32, orders_32x16, orders_32x32,
// 16X32, 32X16, 32X32
orders_32x64, orders_64x32, orders_64x64,
// 32X64, 64X32, 64X64
orders_64x128, orders_128x64, orders_128x128
};
#endif // CONFIG_EXT_PARTITION
#if CONFIG_EXT_PARTITION_TYPES #if CONFIG_EXT_PARTITION_TYPES
static const uint8_t orders_verta_32x32[4] = { static const uint8_t orders_verta_64x64[4] = {
0, 2, 0, 2,
1, 2, 1, 2,
}; };
static const uint8_t orders_verta_16x16[16] = { static const uint8_t orders_verta_32x32[16] = {
0, 2, 4, 6, 0, 2, 4, 6,
1, 2, 5, 6, 1, 2, 5, 6,
8, 10, 12, 14, 8, 10, 12, 14,
9, 10, 13, 14, 9, 10, 13, 14,
}; };
static const uint8_t orders_verta_8x8[64] = { static const uint8_t orders_verta_16x16[64] = {
0, 2, 4, 6, 16, 18, 20, 22, 0, 2, 4, 6, 16, 18, 20, 22,
1, 2, 5, 6, 17, 18, 21, 22, 1, 2, 5, 6, 17, 18, 21, 22,
8, 10, 12, 14, 24, 26, 28, 30, 8, 10, 12, 14, 24, 26, 28, 30,
@@ -120,12 +192,53 @@ static const uint8_t orders_verta_8x8[64] = {
40, 42, 44, 46, 56, 58, 60, 62, 40, 42, 44, 46, 56, 58, 60, 62,
41, 42, 45, 46, 57, 58, 61, 62, 41, 42, 45, 46, 57, 58, 61, 62,
}; };
static const uint8_t *const orders_verta[BLOCK_SIZES] = { #if CONFIG_EXT_PARTITION
orders_verta_8x8, orders_verta_8x8, orders_verta_8x8, orders_verta_8x8, static const uint8_t orders_verta_8x8[256] = {
orders_8x16, orders_16x8, orders_verta_16x16, 0, 2, 4, 6, 16, 18, 20, 22, 64, 66, 68, 70, 80, 82, 84, 86,
orders_16x32, orders_32x16, orders_verta_32x32, 1, 2, 5, 6, 17, 18, 21, 22, 65, 66, 69, 70, 81, 82, 85, 86,
orders_32x64, orders_64x32, orders_64x64, 8, 10, 12, 14, 24, 26, 28, 30, 72, 74, 76, 78, 88, 90, 92, 94,
9, 10, 13, 14, 25, 26, 29, 30, 73, 74, 77, 78, 89, 90, 93, 94,
32, 34, 36, 38, 48, 50, 52, 54, 96, 98, 100, 102, 112, 114, 116, 118,
33, 34, 37, 38, 49, 50, 53, 54, 97, 98, 101, 102, 113, 114, 117, 118,
40, 42, 44, 46, 56, 58, 60, 62, 104, 106, 108, 110, 120, 122, 124, 126,
41, 42, 45, 46, 57, 58, 61, 62, 105, 106, 109, 110, 121, 122, 125, 126,
128, 130, 132, 134, 144, 146, 148, 150, 192, 194, 196, 198, 208, 210, 212, 214,
129, 130, 133, 134, 145, 146, 149, 150, 193, 194, 197, 198, 209, 210, 213, 214,
136, 138, 140, 142, 152, 154, 156, 158, 200, 202, 204, 206, 216, 218, 220, 222,
137, 138, 141, 142, 153, 154, 157, 158, 201, 202, 205, 206, 217, 218, 221, 222,
160, 162, 164, 166, 176, 178, 180, 182, 224, 226, 228, 230, 240, 242, 244, 246,
161, 162, 165, 166, 177, 178, 181, 182, 225, 226, 229, 230, 241, 242, 245, 246,
168, 170, 172, 174, 184, 186, 188, 190, 232, 234, 236, 238, 248, 250, 252, 254,
169, 170, 173, 174, 185, 186, 189, 190, 233, 234, 237, 238, 249, 250, 253, 254,
}; };
static const uint8_t *const orders_verta[BLOCK_SIZES] = {
// 4X4
orders_verta_8x8,
// 4X8, 8X4, 8X8
orders_verta_8x8, orders_verta_8x8, orders_verta_8x8,
// 8X16, 16X8, 16X16
orders_8x16, orders_16x8, orders_verta_16x16,
// 16X32, 32X16, 32X32
orders_16x32, orders_32x16, orders_verta_32x32,
// 32X64, 64X32, 64X64
orders_32x64, orders_64x32, orders_verta_64x64,
// 64x128, 128x64, 128x128
orders_64x128, orders_128x64, orders_128x128
};
#else
static const uint8_t *const orders_verta[BLOCK_SIZES] = {
// 4X4
orders_verta_16x16,
// 4X8, 8X4, 8X8
orders_verta_16x16, orders_verta_16x16, orders_verta_16x16,
// 8X16, 16X8, 16X16
orders_16x32, orders_32x16, orders_verta_32x32,
// 16X32, 32X16, 32X32
orders_32x64, orders_64x32, orders_verta_64x64,
// 32X64, 64X32, 64X64
orders_64x128, orders_128x64, orders_128x128
};
#endif // CONFIG_EXT_PARTITION
#endif // CONFIG_EXT_PARTITION_TYPES #endif // CONFIG_EXT_PARTITION_TYPES
static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col, static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
@@ -159,19 +272,21 @@ static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
if (x + step < w) if (x + step < w)
return 1; return 1;
mi_row = (mi_row & 7) >> hl; mi_row = (mi_row & MI_MASK) >> hl;
mi_col = (mi_col & 7) >> wl; mi_col = (mi_col & MI_MASK) >> wl;
// If top row of coding unit // If top row of coding unit
if (mi_row == 0) if (mi_row == 0)
return 1; return 1;
// If rightmost column of coding unit // If rightmost column of coding unit
if (((mi_col + 1) << wl) >= 8) if (((mi_col + 1) << wl) >= MI_BLOCK_SIZE)
return 0; return 0;
my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0]; my_order =
tr_order = order[((mi_row - 1) << (3 - wl)) + mi_col + 1]; order[((mi_row + 0) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 0];
tr_order =
order[((mi_row - 1) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 1];
return my_order > tr_order; return my_order > tr_order;
} else { } else {
@@ -200,17 +315,17 @@ static int vp10_has_bottom(BLOCK_SIZE bsize, int mi_row, int mi_col,
if (y + step < h) if (y + step < h)
return 1; return 1;
mi_row = (mi_row & 7) >> hl; mi_row = (mi_row & MI_MASK) >> hl;
mi_col = (mi_col & 7) >> wl; mi_col = (mi_col & MI_MASK) >> wl;
if (mi_col == 0) if (mi_col == 0)
return (mi_row << (hl + !ss_y)) + y + step < (8 << !ss_y); return (mi_row << (hl + !ss_y)) + y + step < (MI_BLOCK_SIZE << !ss_y);
if (((mi_row + 1) << hl) >= 8) if (((mi_row + 1) << hl) >= MI_BLOCK_SIZE)
return 0; return 0;
my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0]; my_order = order[((mi_row + 0) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col + 0];
bl_order = order[((mi_row + 1) << (3 - wl)) + mi_col - 1]; bl_order = order[((mi_row + 1) << (MI_BLOCK_SIZE_LOG2 - wl)) + mi_col - 1];
return bl_order < my_order; return bl_order < my_order;
} }
@@ -336,8 +451,8 @@ static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs,
if (filter_type != INTRA_FILTER_LINEAR) { if (filter_type != INTRA_FILTER_LINEAR) {
const int pad_size = SUBPEL_TAPS >> 1; const int pad_size = SUBPEL_TAPS >> 1;
int len; int len;
DECLARE_ALIGNED(16, uint8_t, buf[SUBPEL_SHIFTS][64]); DECLARE_ALIGNED(16, uint8_t, buf[SUBPEL_SHIFTS][MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint8_t, src[64 + SUBPEL_TAPS]); DECLARE_ALIGNED(16, uint8_t, src[MAX_SB_SIZE + SUBPEL_TAPS]);
uint8_t flags[SUBPEL_SHIFTS]; uint8_t flags[SUBPEL_SHIFTS];
memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0])); memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0]));
@@ -467,8 +582,8 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs,
if (filter_type != INTRA_FILTER_LINEAR) { if (filter_type != INTRA_FILTER_LINEAR) {
const int pad_size = SUBPEL_TAPS >> 1; const int pad_size = SUBPEL_TAPS >> 1;
int len, i; int len, i;
DECLARE_ALIGNED(16, uint8_t, buf[64][4 * SUBPEL_SHIFTS]); DECLARE_ALIGNED(16, uint8_t, buf[MAX_SB_SIZE][4 * SUBPEL_SHIFTS]);
DECLARE_ALIGNED(16, uint8_t, src[(64 + SUBPEL_TAPS) * 4]); DECLARE_ALIGNED(16, uint8_t, src[(MAX_SB_SIZE + SUBPEL_TAPS) * 4]);
uint8_t flags[SUBPEL_SHIFTS]; uint8_t flags[SUBPEL_SHIFTS];
memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0])); memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0]));
@@ -1063,8 +1178,8 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
int i; int i;
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
DECLARE_ALIGNED(16, uint16_t, left_col[64]); DECLARE_ALIGNED(16, uint16_t, left_col[MAX_SB_SIZE]);
DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]); DECLARE_ALIGNED(16, uint16_t, above_data[MAX_SB_SIZE + 16]);
uint16_t *above_row = above_data + 16; uint16_t *above_row = above_data + 16;
const uint16_t *const_above_row = above_row; const uint16_t *const_above_row = above_row;
const int bs = 4 << tx_size; const int bs = 4 << tx_size;
@@ -1220,9 +1335,9 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
int n_left_px, int n_bottomleft_px, int n_left_px, int n_bottomleft_px,
int plane) { int plane) {
int i; int i;
DECLARE_ALIGNED(16, uint8_t, left_col[64]); DECLARE_ALIGNED(16, uint8_t, left_col[MAX_SB_SIZE]);
const uint8_t *above_ref = ref - ref_stride; const uint8_t *above_ref = ref - ref_stride;
DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]); DECLARE_ALIGNED(16, uint8_t, above_data[MAX_SB_SIZE + 16]);
uint8_t *above_row = above_data + 16; uint8_t *above_row = above_data + 16;
const uint8_t *const_above_row = above_row; const uint8_t *const_above_row = above_row;
const int bs = 4 << tx_size; const int bs = 4 << tx_size;

View File

@@ -109,6 +109,12 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,
path = LF_PATH_SLOW; path = LF_PATH_SLOW;
#endif // !CONFIG_EXT_PARTITION_TYPES #endif // !CONFIG_EXT_PARTITION_TYPES
#if CONFIG_EXT_PARTITION
printf("STOPPING: This code has not been modified to work with the "
"extended coding unit size experiment");
exit(EXIT_FAILURE);
#endif // CONFIG_EXT_PARTITION
for (mi_row = start; mi_row < stop; for (mi_row = start; mi_row < stop;
mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
@@ -176,6 +182,12 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
const int num_workers = VPXMIN(nworkers, tile_cols); const int num_workers = VPXMIN(nworkers, tile_cols);
int i; int i;
#if CONFIG_EXT_PARTITION
printf("STOPPING: This code has not been modified to work with the "
"extended coding unit size experiment");
exit(EXIT_FAILURE);
#endif // CONFIG_EXT_PARTITION
if (!lf_sync->sync_range || sb_rows != lf_sync->rows || if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
num_workers > lf_sync->num_workers) { num_workers > lf_sync->num_workers) {
vp10_loop_filter_dealloc(lf_sync); vp10_loop_filter_dealloc(lf_sync);

View File

@@ -5,8 +5,8 @@
#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h" #include "vpx_ports/mem.h"
#define MAX_BLOCK_WIDTH (64) #define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
#define MAX_BLOCK_HEIGHT (64) #define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
#define MAX_STEP (32) #define MAX_STEP (32)
#define MAX_FILTER_TAP (12) #define MAX_FILTER_TAP (12)

View File

@@ -489,7 +489,7 @@ static void extend_and_predict_highbd(const uint8_t *buf_ptr1,
MACROBLOCKD *xd, MACROBLOCKD *xd,
int w, int h, int ref, int xs, int ys) { int w, int h, int ref, int xs, int ys) {
DECLARE_ALIGNED(16, uint16_t, DECLARE_ALIGNED(16, uint16_t,
mc_buf_high[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]); mc_buf_high[(MAX_SB_SIZE + 16) * 2 * (MAX_SB_SIZE + 16) * 2]);
const uint8_t *buf_ptr; const uint8_t *buf_ptr;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -535,7 +535,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX #endif // CONFIG_EXT_INTER && CONFIG_SUPERTX
MACROBLOCKD *xd, MACROBLOCKD *xd,
int w, int h, int ref, int xs, int ys) { int w, int h, int ref, int xs, int ys) {
DECLARE_ALIGNED(16, uint8_t, mc_buf[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]); DECLARE_ALIGNED(16, uint8_t,
mc_buf[(MAX_SB_SIZE + 16) * 2 * (MAX_SB_SIZE + 16) * 2]);
const uint8_t *buf_ptr; const uint8_t *buf_ptr;
build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w,
@@ -1093,7 +1094,7 @@ static void set_param_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd,
} }
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
xd->above_txfm_context = cm->above_txfm_context + mi_col; xd->above_txfm_context = cm->above_txfm_context + mi_col;
xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
set_txfm_ctx(xd->left_txfm_context, xd->mi[0]->mbmi.tx_size, bh); set_txfm_ctx(xd->left_txfm_context, xd->mi[0]->mbmi.tx_size, bh);
set_txfm_ctx(xd->above_txfm_context, xd->mi[0]->mbmi.tx_size, bw); set_txfm_ctx(xd->above_txfm_context, xd->mi[0]->mbmi.tx_size, bw);
#endif #endif
@@ -1304,38 +1305,38 @@ static void dec_predict_sb_complex(VP10Decoder *const pbi,
uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3];
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint8_t,
tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); tmp_buf1[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint8_t,
tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); tmp_buf2[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint8_t,
tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); tmp_buf3[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; int dst_stride1[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; int dst_stride2[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; int dst_stride3[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t); int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len); dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_TX_SQUARE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len); dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_TX_SQUARE * len);
dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len); dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_TX_SQUARE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len); dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_TX_SQUARE * len);
dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3); dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3);
dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len); dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAX_TX_SQUARE * len);
dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len); dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAX_TX_SQUARE * len);
} else { } else {
#endif #endif
dst_buf1[0] = tmp_buf1; dst_buf1[0] = tmp_buf1;
dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN; dst_buf1[1] = tmp_buf1 + MAX_TX_SQUARE;
dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN; dst_buf1[2] = tmp_buf1 + 2 * MAX_TX_SQUARE;
dst_buf2[0] = tmp_buf2; dst_buf2[0] = tmp_buf2;
dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN; dst_buf2[1] = tmp_buf2 + MAX_TX_SQUARE;
dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN; dst_buf2[2] = tmp_buf2 + 2 * MAX_TX_SQUARE;
dst_buf3[0] = tmp_buf3; dst_buf3[0] = tmp_buf3;
dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN; dst_buf3[1] = tmp_buf3 + MAX_TX_SQUARE;
dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN; dst_buf3[2] = tmp_buf3 + 2 * MAX_TX_SQUARE;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
} }
#endif #endif
@@ -1900,39 +1901,37 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
if (mbmi->obmc) { if (mbmi->obmc) {
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint8_t,
tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint8_t,
tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
#else #else
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint8_t,
tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint8_t,
tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
assert(mbmi->sb_type >= BLOCK_8X8); assert(mbmi->sb_type >= BLOCK_8X8);
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t); int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len); dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len);
CU_SIZE * CU_SIZE * 2 * len);
dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len); dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
CU_SIZE * CU_SIZE * 2 * len);
} else { } else {
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
dst_buf1[0] = tmp_buf1; dst_buf1[0] = tmp_buf1;
dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE; dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2; dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
dst_buf2[0] = tmp_buf2; dst_buf2[0] = tmp_buf2;
dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE; dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2; dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
} }
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
@@ -3281,7 +3280,7 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi,
#if CONFIG_ANS #if CONFIG_ANS
&td->token_ans, &td->token_ans,
#endif // CONFIG_ANS #endif // CONFIG_ANS
BLOCK_64X64, 4); BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2);
} }
pbi->mb.corrupted |= td->xd.corrupted; pbi->mb.corrupted |= td->xd.corrupted;
if (pbi->mb.corrupted) if (pbi->mb.corrupted)
@@ -3396,7 +3395,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data,
#if CONFIG_ANS #if CONFIG_ANS
&tile_data->token_ans, &tile_data->token_ans,
#endif // CONFIG_ANS #endif // CONFIG_ANS
BLOCK_64X64, 4); BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2);
} }
} }
return !tile_data->xd.corrupted; return !tile_data->xd.corrupted;

View File

@@ -39,8 +39,8 @@ typedef struct TileData {
#endif // CONFIG_ANS #endif // CONFIG_ANS
DECLARE_ALIGNED(16, MACROBLOCKD, xd); DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */ /* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, color_index_map[2][64 * 64]); DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
} TileData; } TileData;
typedef struct TileWorkerData { typedef struct TileWorkerData {
@@ -52,8 +52,8 @@ typedef struct TileWorkerData {
FRAME_COUNTS counts; FRAME_COUNTS counts;
DECLARE_ALIGNED(16, MACROBLOCKD, xd); DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */ /* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, color_index_map[2][64 * 64]); DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
struct vpx_internal_error_info error_info; struct vpx_internal_error_info error_info;
} TileWorkerData; } TileWorkerData;

View File

@@ -62,7 +62,7 @@ static int decode_coefs(const MACROBLOCKD *xd,
const vpx_prob *prob; const vpx_prob *prob;
unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
unsigned int (*eob_branch_count)[COEFF_CONTEXTS]; unsigned int (*eob_branch_count)[COEFF_CONTEXTS];
uint8_t token_cache[32 * 32]; uint8_t token_cache[MAX_TX_SQUARE];
const uint8_t *band_translate = get_band_translate(tx_size); const uint8_t *band_translate = get_band_translate(tx_size);
int dq_shift; int dq_shift;
int v, token; int v, token;
@@ -245,7 +245,7 @@ static int decode_coefs_ans(const MACROBLOCKD *const xd,
const vpx_prob *prob; const vpx_prob *prob;
unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
unsigned int (*eob_branch_count)[COEFF_CONTEXTS]; unsigned int (*eob_branch_count)[COEFF_CONTEXTS];
uint8_t token_cache[32 * 32]; uint8_t token_cache[MAX_TX_SQUARE];
const uint8_t *band_translate = get_band_translate(tx_size); const uint8_t *band_translate = get_band_translate(tx_size);
int dq_shift; int dq_shift;
int v, token; int v, token;

View File

@@ -116,8 +116,8 @@ void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
VP10_COMMON *const cm = &cpi->common; VP10_COMMON *const cm = &cpi->common;
const int mi_offset = mi_row * cm->mi_cols + mi_col; const int mi_offset = mi_row * cm->mi_cols + mi_col;
const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const int bw = num_8x8_blocks_wide_lookup[BLOCK_LARGEST];
const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; const int bh = num_8x8_blocks_high_lookup[BLOCK_LARGEST];
const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]); const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]);
const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]); const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]);
int x, y; int x, y;

View File

@@ -415,9 +415,9 @@ static void cyclic_refresh_update_map(VP10_COMP *const cpi) {
bl_index = mi_row * cm->mi_cols + mi_col; bl_index = mi_row * cm->mi_cols + mi_col;
// Loop through all 8x8 blocks in superblock and update map. // Loop through all 8x8 blocks in superblock and update map.
xmis = xmis =
VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_64X64]); VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_LARGEST]);
ymis = ymis =
VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_64X64]); VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_LARGEST]);
for (y = 0; y < ymis; y++) { for (y = 0; y < ymis; y++) {
for (x = 0; x < xmis; x++) { for (x = 0; x < xmis; x++) {
const int bl_index2 = bl_index + y * cm->mi_cols + x; const int bl_index2 = bl_index + y * cm->mi_cols + x;

View File

@@ -32,9 +32,11 @@ static const int segment_id[ENERGY_SPAN] = {0, 1, 1, 2, 3, 4};
#define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN] #define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN]
DECLARE_ALIGNED(16, static const uint8_t, vp10_64_zeros[64]) = {0}; DECLARE_ALIGNED(16, static const uint8_t,
vp10_all_zeros[MAX_SB_SIZE]) = {0};
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, static const uint16_t, vp10_highbd_64_zeros[64]) = {0}; DECLARE_ALIGNED(16, static const uint16_t,
vp10_highbd_all_zeros[MAX_SB_SIZE]) = {0};
#endif #endif
unsigned int vp10_vaq_segment_id(int energy) { unsigned int vp10_vaq_segment_id(int energy) {
@@ -153,17 +155,17 @@ static unsigned int block_variance(VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride, aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(vp10_highbd_64_zeros), 0, bw, bh, CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros), 0, bw, bh,
&sse, &avg); &sse, &avg);
sse >>= 2 * (xd->bd - 8); sse >>= 2 * (xd->bd - 8);
avg >>= (xd->bd - 8); avg >>= (xd->bd - 8);
} else { } else {
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
vp10_64_zeros, 0, bw, bh, &sse, &avg); vp10_all_zeros, 0, bw, bh, &sse, &avg);
} }
#else #else
aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
vp10_64_zeros, 0, bw, bh, &sse, &avg); vp10_all_zeros, 0, bw, bh, &sse, &avg);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
var = sse - (((int64_t)avg * avg) / (bw * bh)); var = sse - (((int64_t)avg * avg) / (bw * bh));
return (256 * var) / (bw * bh); return (256 * var) / (bw * bh);
@@ -172,17 +174,17 @@ static unsigned int block_variance(VP10_COMP *cpi, MACROBLOCK *x,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride, x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(vp10_highbd_64_zeros), CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros),
0, &sse); 0, &sse);
} else { } else {
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride, x->plane[0].src.stride,
vp10_64_zeros, 0, &sse); vp10_all_zeros, 0, &sse);
} }
#else #else
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
x->plane[0].src.stride, x->plane[0].src.stride,
vp10_64_zeros, 0, &sse); vp10_all_zeros, 0, &sse);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
return (256 * var) >> num_pels_log2_lookup[bs]; return (256 * var) >> num_pels_log2_lookup[bs];
} }

View File

@@ -1893,7 +1893,7 @@ static void write_modes(VP10_COMP *const cpi,
for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, 0, write_modes_sb_wrapper(cpi, tile, w, ans, tok, tok_end, 0,
mi_row, mi_col, BLOCK_64X64); mi_row, mi_col, BLOCK_LARGEST);
} }
} }
} }

View File

@@ -28,7 +28,7 @@ typedef struct {
} diff; } diff;
typedef struct macroblock_plane { typedef struct macroblock_plane {
DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]); DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
tran_low_t *qcoeff; tran_low_t *qcoeff;
tran_low_t *coeff; tran_low_t *coeff;
uint16_t *eobs; uint16_t *eobs;
@@ -63,10 +63,10 @@ typedef struct {
} MB_MODE_INFO_EXT; } MB_MODE_INFO_EXT;
typedef struct { typedef struct {
uint8_t best_palette_color_map[4096]; uint8_t best_palette_color_map[MAX_SB_SQUARE];
double kmeans_data_buf[2 * 4096]; double kmeans_data_buf[2 * MAX_SB_SQUARE];
uint8_t kmeans_indices_buf[4096]; uint8_t kmeans_indices_buf[MAX_SB_SQUARE];
uint8_t kmeans_pre_indices_buf[4096]; uint8_t kmeans_pre_indices_buf[MAX_SB_SQUARE];
} PALETTE_BUFFER; } PALETTE_BUFFER;
typedef struct macroblock MACROBLOCK; typedef struct macroblock MACROBLOCK;
@@ -140,11 +140,11 @@ struct macroblock {
// Notes transform blocks where no coefficents are coded. // Notes transform blocks where no coefficents are coded.
// Set during mode selection. Read during block encoding. // Set during mode selection. Read during block encoding.
uint8_t zcoeff_blk[TX_SIZES][256]; uint8_t zcoeff_blk[TX_SIZES][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4];
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
uint8_t blk_skip[MAX_MB_PLANE][256]; uint8_t blk_skip[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4];
#if CONFIG_REF_MV #if CONFIG_REF_MV
uint8_t blk_skip_drl[MAX_MB_PLANE][256]; uint8_t blk_skip_drl[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4];
#endif #endif
#endif #endif
@@ -164,12 +164,12 @@ struct macroblock {
int quant_fp; int quant_fp;
// skip forward transform and quantization // skip forward transform and quantization
uint8_t skip_txfm[MAX_MB_PLANE][4]; uint8_t skip_txfm[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB];
#define SKIP_TXFM_NONE 0 #define SKIP_TXFM_NONE 0
#define SKIP_TXFM_AC_DC 1 #define SKIP_TXFM_AC_DC 1
#define SKIP_TXFM_AC_ONLY 2 #define SKIP_TXFM_AC_ONLY 2
int64_t bsse[MAX_MB_PLANE][4]; int64_t bsse[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB];
// Used to store sub partition's choices. // Used to store sub partition's choices.
MV pred_mv[MAX_REF_FRAMES]; MV pred_mv[MAX_REF_FRAMES];

View File

@@ -11,11 +11,14 @@
#include "vp10/encoder/context_tree.h" #include "vp10/encoder/context_tree.h"
#include "vp10/encoder/encoder.h" #include "vp10/encoder/encoder.h"
static const BLOCK_SIZE square[] = { static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 2] = {
BLOCK_8X8, BLOCK_8X8,
BLOCK_16X16, BLOCK_16X16,
BLOCK_32X32, BLOCK_32X32,
BLOCK_64X64, BLOCK_64X64,
#if CONFIG_EXT_PARTITION
BLOCK_128X128,
#endif // CONFIG_EXT_PARTITION
}; };
static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk, static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk,
@@ -53,6 +56,14 @@ static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk,
ctx->eobs_pbuf[i][k] = ctx->eobs[i][k]; ctx->eobs_pbuf[i][k] = ctx->eobs[i][k];
} }
} }
if (cm->allow_screen_content_tools) {
for (i = 0; i < 2; ++i) {
CHECK_MEM_ERROR(cm, ctx->color_index_map[i],
vpx_memalign(32,
num_pix * sizeof(*ctx->color_index_map[i])));
}
}
} }
static void free_mode_context(PICK_MODE_CONTEXT *ctx) { static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
@@ -177,8 +188,13 @@ static void free_tree_contexts(PC_TREE *tree) {
// represents the state of our search. // represents the state of our search.
void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) { void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) {
int i, j; int i, j;
#if CONFIG_EXT_PARTITION
const int leaf_nodes = 256;
const int tree_nodes = 256 + 64 + 16 + 4 + 1;
#else
const int leaf_nodes = 64; const int leaf_nodes = 64;
const int tree_nodes = 64 + 16 + 4 + 1; const int tree_nodes = 64 + 16 + 4 + 1;
#endif // CONFIG_EXT_PARTITION
int pc_tree_index = 0; int pc_tree_index = 0;
PC_TREE *this_pc; PC_TREE *this_pc;
PICK_MODE_CONTEXT *this_leaf; PICK_MODE_CONTEXT *this_leaf;
@@ -217,7 +233,7 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) {
// Each node has 4 leaf nodes, fill each block_size level of the tree // Each node has 4 leaf nodes, fill each block_size level of the tree
// from leafs to the root. // from leafs to the root.
for (nodes = 16; nodes > 0; nodes >>= 2) { for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) {
for (i = 0; i < nodes; ++i) { for (i = 0; i < nodes; ++i) {
PC_TREE *const tree = &td->pc_tree[pc_tree_index]; PC_TREE *const tree = &td->pc_tree[pc_tree_index];
alloc_tree_contexts(cm, tree, 4 << (2 * square_index)); alloc_tree_contexts(cm, tree, 4 << (2 * square_index));
@@ -233,11 +249,17 @@ void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) {
} }
void vp10_free_pc_tree(ThreadData *td) { void vp10_free_pc_tree(ThreadData *td) {
#if CONFIG_EXT_PARTITION
const int leaf_nodes = 256;
const int tree_nodes = 256 + 64 + 16 + 4 + 1;
#else
const int leaf_nodes = 64;
const int tree_nodes = 64 + 16 + 4 + 1; const int tree_nodes = 64 + 16 + 4 + 1;
#endif // CONFIG_EXT_PARTITION
int i; int i;
// Set up all 4x4 mode contexts // Set up all 4x4 mode contexts
for (i = 0; i < 64; ++i) for (i = 0; i < leaf_nodes; ++i)
free_mode_context(&td->leaf_tree[i]); free_mode_context(&td->leaf_tree[i]);
// Sets up all the leaf nodes in the tree. // Sets up all the leaf nodes in the tree.

View File

@@ -49,7 +49,6 @@ typedef struct {
// For current partition, only if all Y, U, and V transform blocks' // For current partition, only if all Y, U, and V transform blocks'
// coefficients are quantized to 0, skippable is set to 0. // coefficients are quantized to 0, skippable is set to 0.
int skippable; int skippable;
uint8_t skip_txfm[MAX_MB_PLANE << 2];
int best_mode_index; int best_mode_index;
int hybrid_pred_diff; int hybrid_pred_diff;
int comp_pred_diff; int comp_pred_diff;

View File

@@ -189,7 +189,7 @@ int vp10_denoiser_filter_c(const uint8_t *sig, int sig_stride,
static uint8_t *block_start(uint8_t *framebuf, int stride, static uint8_t *block_start(uint8_t *framebuf, int stride,
int mi_row, int mi_col) { int mi_row, int mi_col) {
return framebuf + (stride * mi_row * 8) + (mi_col * 8); return framebuf + (stride * mi_row * MI_SIZE) + (mi_col * MI_SIZE);
} }
static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,

View File

@@ -93,7 +93,16 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td,
// purposes of activity masking. // purposes of activity masking.
// Eventually this should be replaced by custom no-reference routines, // Eventually this should be replaced by custom no-reference routines,
// which will be faster. // which will be faster.
static const uint8_t VP9_VAR_OFFS[64] = { static const uint8_t VP10_VAR_OFFS[MAX_SB_SIZE] = {
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
#if CONFIG_EXT_PARTITION
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
@@ -102,10 +111,20 @@ static const uint8_t VP9_VAR_OFFS[64] = {
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128 128, 128, 128, 128, 128, 128, 128, 128
#endif // CONFIG_EXT_PARTITION
}; };
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { static const uint16_t VP10_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
#if CONFIG_EXT_PARTITION
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
@@ -114,9 +133,19 @@ static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128 128, 128, 128, 128, 128, 128, 128, 128
#endif // CONFIG_EXT_PARTITION
}; };
static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { static const uint16_t VP10_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
#if CONFIG_EXT_PARTITION
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
@@ -125,9 +154,19 @@ static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4
#endif // CONFIG_EXT_PARTITION
}; };
static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { static const uint16_t VP10_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
#if CONFIG_EXT_PARTITION
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
@@ -136,6 +175,7 @@ static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16
#endif // CONFIG_EXT_PARTITION
}; };
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
@@ -144,7 +184,7 @@ unsigned int vp10_get_sby_perpixel_variance(VP10_COMP *cpi,
BLOCK_SIZE bs) { BLOCK_SIZE bs) {
unsigned int sse; unsigned int sse;
const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
VP9_VAR_OFFS, 0, &sse); VP10_VAR_OFFS, 0, &sse);
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
} }
@@ -155,18 +195,18 @@ unsigned int vp10_high_get_sby_perpixel_variance(
switch (bd) { switch (bd) {
case 10: case 10:
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10),
0, &sse); 0, &sse);
break; break;
case 12: case 12:
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12),
0, &sse); 0, &sse);
break; break;
case 8: case 8:
default: default:
var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8),
0, &sse); 0, &sse);
break; break;
} }
@@ -406,6 +446,13 @@ typedef struct {
v32x32 split[4]; v32x32 split[4];
} v64x64; } v64x64;
#if CONFIG_EXT_PARTITION
typedef struct {
partition_variance part_variances;
v64x64 split[4];
} v128x128;
#endif // CONFIG_EXT_PARTITION
typedef struct { typedef struct {
partition_variance *part_variances; partition_variance *part_variances;
var *split[4]; var *split[4];
@@ -415,12 +462,24 @@ typedef enum {
V16X16, V16X16,
V32X32, V32X32,
V64X64, V64X64,
#if CONFIG_EXT_PARTITION
V128X128,
#endif // CONFIG_EXT_PARTITION
} TREE_LEVEL; } TREE_LEVEL;
static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
int i; int i;
node->part_variances = NULL; node->part_variances = NULL;
switch (bsize) { switch (bsize) {
#if CONFIG_EXT_PARTITION
case BLOCK_128X128: {
v128x128 *vt = (v128x128 *) data;
node->part_variances = &vt->part_variances;
for (i = 0; i < 4; i++)
node->split[i] = &vt->split[i].part_variances.none;
break;
}
#endif // CONFIG_EXT_PARTITION
case BLOCK_64X64: { case BLOCK_64X64: {
v64x64 *vt = (v64x64 *) data; v64x64 *vt = (v64x64 *) data;
node->part_variances = &vt->part_variances; node->part_variances = &vt->part_variances;
@@ -770,7 +829,8 @@ static int choose_partitioning(VP10_COMP *cpi,
const uint8_t *d; const uint8_t *d;
int sp; int sp;
int dp; int dp;
int pixels_wide = 64, pixels_high = 64; int pixels_wide = 8 * num_8x8_blocks_wide_lookup[BLOCK_LARGEST];
int pixels_high = 8 * num_8x8_blocks_high_lookup[BLOCK_LARGEST];
int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]}; cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]};
@@ -781,10 +841,11 @@ static int choose_partitioning(VP10_COMP *cpi,
int variance4x4downsample[16]; int variance4x4downsample[16];
int segment_id = CR_SEGMENT_ID_BASE; int segment_id = CR_SEGMENT_ID_BASE;
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map :
cm->last_frame_seg_map; cm->last_frame_seg_map;
segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col);
if (cyclic_refresh_segment_id_boosted(segment_id)) { if (cyclic_refresh_segment_id_boosted(segment_id)) {
int q = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex); int q = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex);
@@ -792,11 +853,12 @@ static int choose_partitioning(VP10_COMP *cpi,
} }
} }
#if CONFIG_EXT_PARTITION_TYPES #if CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
assert(0); printf("Not yet implemented: choose_partitioning\n");
#endif exit(-1);
#endif // CONFIG_EXT_PARTITION
set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_LARGEST);
if (xd->mb_to_right_edge < 0) if (xd->mb_to_right_edge < 0)
pixels_wide += (xd->mb_to_right_edge >> 3); pixels_wide += (xd->mb_to_right_edge >> 3);
@@ -813,8 +875,20 @@ static int choose_partitioning(VP10_COMP *cpi,
const YV12_BUFFER_CONFIG *yv12_g = NULL; const YV12_BUFFER_CONFIG *yv12_g = NULL;
unsigned int y_sad, y_sad_g; unsigned int y_sad, y_sad_g;
const BLOCK_SIZE bsize = BLOCK_32X32
+ (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows); const int max_mi_block_size = num_8x8_blocks_wide_lookup[BLOCK_LARGEST];
const int is_right_edge = mi_col + max_mi_block_size / 2 > cm->mi_cols;
const int is_left_edge = mi_row + max_mi_block_size / 2 > cm->mi_rows;
BLOCK_SIZE bsize;
if (is_right_edge && is_left_edge)
bsize = get_subsize(BLOCK_LARGEST, PARTITION_SPLIT);
else if (is_right_edge)
bsize = get_subsize(BLOCK_LARGEST, PARTITION_VERT);
else if (is_left_edge)
bsize = get_subsize(BLOCK_LARGEST, PARTITION_HORZ);
else
bsize = BLOCK_LARGEST;
assert(yv12 != NULL); assert(yv12 != NULL);
yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
@@ -834,7 +908,7 @@ static int choose_partitioning(VP10_COMP *cpi,
&cm->frame_refs[LAST_FRAME - 1].sf); &cm->frame_refs[LAST_FRAME - 1].sf);
mbmi->ref_frame[0] = LAST_FRAME; mbmi->ref_frame[0] = LAST_FRAME;
mbmi->ref_frame[1] = NONE; mbmi->ref_frame[1] = NONE;
mbmi->sb_type = BLOCK_64X64; mbmi->sb_type = BLOCK_LARGEST;
mbmi->mv[0].as_int = 0; mbmi->mv[0].as_int = 0;
mbmi->interp_filter = BILINEAR; mbmi->interp_filter = BILINEAR;
@@ -849,7 +923,7 @@ static int choose_partitioning(VP10_COMP *cpi,
x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv; x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv;
} }
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); vp10_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_LARGEST);
for (i = 1; i <= 2; ++i) { for (i = 1; i <= 2; ++i) {
struct macroblock_plane *p = &x->plane[i]; struct macroblock_plane *p = &x->plane[i];
@@ -868,33 +942,29 @@ static int choose_partitioning(VP10_COMP *cpi,
d = xd->plane[0].dst.buf; d = xd->plane[0].dst.buf;
dp = xd->plane[0].dst.stride; dp = xd->plane[0].dst.stride;
// If the y_sad is very small, take 64x64 as partition and exit. // If the y_sad is very small, take the largest partition and exit.
// Don't check on boosted segment for now, as 64x64 is suppressed there. // Don't check on boosted segment for now, as largest is suppressed there.
if (segment_id == CR_SEGMENT_ID_BASE && if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) {
y_sad < cpi->vbp_threshold_sad) { if (!is_right_edge && !is_left_edge) {
const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_LARGEST);
const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64];
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows) {
set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64);
return 0; return 0;
} }
} }
} else { } else {
d = VP9_VAR_OFFS; d = VP10_VAR_OFFS;
dp = 0; dp = 0;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (xd->bd) { switch (xd->bd) {
case 10: case 10:
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10);
break; break;
case 12: case 12:
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12);
break; break;
case 8: case 8:
default: default:
d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8);
break; break;
} }
} }
@@ -1699,15 +1769,6 @@ static void rd_pick_sb_modes(VP10_COMP *cpi,
p[i].eobs = ctx->eobs_pbuf[i][0]; p[i].eobs = ctx->eobs_pbuf[i][0];
} }
if (cm->current_video_frame == 0 && cm->allow_screen_content_tools) {
for (i = 0; i < 2; ++i) {
if (ctx->color_index_map[i] == 0) {
CHECK_MEM_ERROR(cm, ctx->color_index_map[i],
vpx_memalign(16, (ctx->num_4x4_blk << 4) *
sizeof(*ctx->color_index_map[i])));
}
}
}
for (i = 0; i < 2; ++i) for (i = 0; i < 2; ++i)
pd[i].color_index_map = ctx->color_index_map[i]; pd[i].color_index_map = ctx->color_index_map[i];
@@ -2084,17 +2145,16 @@ static void update_stats(VP10_COMMON *cm, ThreadData *td
} }
} }
typedef struct { typedef struct {
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE]; ENTROPY_CONTEXT a[2 * MI_BLOCK_SIZE * MAX_MB_PLANE];
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE]; ENTROPY_CONTEXT l[2 * MI_BLOCK_SIZE * MAX_MB_PLANE];
PARTITION_CONTEXT sa[8]; PARTITION_CONTEXT sa[MI_BLOCK_SIZE];
PARTITION_CONTEXT sl[8]; PARTITION_CONTEXT sl[MI_BLOCK_SIZE];
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
TXFM_CONTEXT *p_ta; TXFM_CONTEXT *p_ta;
TXFM_CONTEXT *p_tl; TXFM_CONTEXT *p_tl;
TXFM_CONTEXT ta[8]; TXFM_CONTEXT ta[MI_BLOCK_SIZE];
TXFM_CONTEXT tl[8]; TXFM_CONTEXT tl[MI_BLOCK_SIZE];
#endif #endif
} RD_SEARCH_MACROBLOCK_CONTEXT; } RD_SEARCH_MACROBLOCK_CONTEXT;
@@ -2892,11 +2952,11 @@ static void rd_use_partition(VP10_COMP *cpi,
// We must have chosen a partitioning and encoding or we'll fail later on. // We must have chosen a partitioning and encoding or we'll fail later on.
// No other opportunities for success. // No other opportunities for success.
if (bsize == BLOCK_64X64) if (bsize == BLOCK_LARGEST)
assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
if (do_recon) { if (do_recon) {
int output_enabled = (bsize == BLOCK_64X64); int output_enabled = (bsize == BLOCK_LARGEST);
encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
pc_tree); pc_tree);
} }
@@ -2909,21 +2969,38 @@ static void rd_use_partition(VP10_COMP *cpi,
} }
static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, // 4x4
BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, // 4x8, 8x4, 8x8
BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 8x16, 16x8, 16x16
BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 16x32, 32x16, 32x32
BLOCK_16X16 BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
}; };
static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_8X8, // 4x4
BLOCK_16X16, BLOCK_32X32, BLOCK_32X32, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 4x8, 8x4, 8x8
BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, // 8x16, 16x8, 16x16
BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32
BLOCK_64X64 BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
}; };
// Next square block size less or equal than current block size.
static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
BLOCK_4X4, // 4x4
BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x8, 8x4, 8x8
BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 8x16, 16x8, 16x16
BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 16x32, 32x16, 32x32
BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
};
// Look at all the mode_info entries for blocks that are part of this // Look at all the mode_info entries for blocks that are part of this
// partition and find the min and max values for sb_type. // partition and find the min and max values for sb_type.
@@ -2954,15 +3031,6 @@ static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
} }
} }
// Next square block size less or equal than current block size.
static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
BLOCK_8X8, BLOCK_8X8, BLOCK_8X8,
BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
BLOCK_32X32, BLOCK_32X32, BLOCK_32X32,
BLOCK_64X64
};
// Look at neighboring blocks and set a min and max partition size based on // Look at neighboring blocks and set a min and max partition size based on
// what they chose. // what they chose.
static void rd_auto_partition_range(VP10_COMP *cpi, const TileInfo *const tile, static void rd_auto_partition_range(VP10_COMP *cpi, const TileInfo *const tile,
@@ -2978,13 +3046,13 @@ static void rd_auto_partition_range(VP10_COMP *cpi, const TileInfo *const tile,
const int col8x8_remaining = tile->mi_col_end - mi_col; const int col8x8_remaining = tile->mi_col_end - mi_col;
int bh, bw; int bh, bw;
BLOCK_SIZE min_size = BLOCK_4X4; BLOCK_SIZE min_size = BLOCK_4X4;
BLOCK_SIZE max_size = BLOCK_64X64; BLOCK_SIZE max_size = BLOCK_LARGEST;
int bs_hist[BLOCK_SIZES] = {0}; int bs_hist[BLOCK_SIZES] = {0};
// Trap case where we do not have a prediction. // Trap case where we do not have a prediction.
if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
// Default "min to max" and "max to min" // Default "min to max" and "max to min"
min_size = BLOCK_64X64; min_size = BLOCK_LARGEST;
max_size = BLOCK_4X4; max_size = BLOCK_4X4;
// NOTE: each call to get_sb_partition_size_range() uses the previous // NOTE: each call to get_sb_partition_size_range() uses the previous
@@ -3054,7 +3122,7 @@ static void set_partition_range(VP10_COMMON *cm, MACROBLOCKD *xd,
MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str]; MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
BLOCK_SIZE bs, min_size, max_size; BLOCK_SIZE bs, min_size, max_size;
min_size = BLOCK_64X64; min_size = BLOCK_LARGEST;
max_size = BLOCK_4X4; max_size = BLOCK_4X4;
if (prev_mi) { if (prev_mi) {
@@ -3104,16 +3172,27 @@ static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
} }
#if CONFIG_FP_MB_STATS #if CONFIG_FP_MB_STATS
const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4};
const int num_16x16_blocks_high_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4};
const int qindex_skip_threshold_lookup[BLOCK_SIZES] = const int qindex_skip_threshold_lookup[BLOCK_SIZES] =
{0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120}; {0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120,
#if CONFIG_EXT_PARTITION
// TODO(debargha): What are the correct numbers here?
130, 130, 150
#endif // CONFIG_EXT_PARTITION
};
const int qindex_split_threshold_lookup[BLOCK_SIZES] = const int qindex_split_threshold_lookup[BLOCK_SIZES] =
{0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120}; {0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120,
#if CONFIG_EXT_PARTITION
// TODO(debargha): What are the correct numbers here?
160, 160, 240
#endif // CONFIG_EXT_PARTITION
};
const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = const int complexity_16x16_blocks_threshold[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6}; {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6
#if CONFIG_EXT_PARTITION
// TODO(debargha): What are the correct numbers here?
8, 8, 10
#endif // CONFIG_EXT_PARTITION
};
typedef enum { typedef enum {
MV_ZERO = 0, MV_ZERO = 0,
@@ -3526,8 +3605,8 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td,
pc_tree->partitioning = PARTITION_NONE; pc_tree->partitioning = PARTITION_NONE;
// Adjust dist breakout threshold according to the partition size. // Adjust dist breakout threshold according to the partition size.
dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + dist_breakout_thr >>= (2 * (MAX_SB_SIZE_LOG2 - 2))
b_height_log2_lookup[bsize]); - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
rate_breakout_thr *= num_pels_log2_lookup[bsize]; rate_breakout_thr *= num_pels_log2_lookup[bsize];
@@ -4124,12 +4203,12 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td,
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
pc_tree->index != 3) { pc_tree->index != 3) {
int output_enabled = (bsize == BLOCK_64X64); int output_enabled = (bsize == BLOCK_LARGEST);
encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
bsize, pc_tree); bsize, pc_tree);
} }
if (bsize == BLOCK_64X64) { if (bsize == BLOCK_LARGEST) {
assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip)); assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip));
assert(best_rdc.rate < INT_MAX); assert(best_rdc.rate < INT_MAX);
assert(best_rdc.dist < INT64_MAX); assert(best_rdc.dist < INT64_MAX);
@@ -4149,6 +4228,11 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
SPEED_FEATURES *const sf = &cpi->sf; SPEED_FEATURES *const sf = &cpi->sf;
int mi_col; int mi_col;
#if CONFIG_EXT_PARTITION
const int leaf_nodes = 256;
#else
const int leaf_nodes = 64;
#endif // CONFIG_EXT_PARTITION
// Initialize the left context for the new SB row // Initialize the left context for the new SB row
vp10_zero_left_context(xd); vp10_zero_left_context(xd);
@@ -4170,10 +4254,10 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
MODE_INFO **mi = cm->mi_grid_visible + idx_str; MODE_INFO **mi = cm->mi_grid_visible + idx_str;
if (sf->adaptive_pred_interp_filter) { if (sf->adaptive_pred_interp_filter) {
for (i = 0; i < 64; ++i) for (i = 0; i < leaf_nodes; ++i)
td->leaf_tree[i].pred_interp_filter = SWITCHABLE; td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
for (i = 0; i < 64; ++i) { for (i = 0; i < leaf_nodes; ++i) {
td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
@@ -4187,29 +4271,29 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
if (seg->enabled) { if (seg->enabled) {
const uint8_t *const map = seg->update_map ? cpi->segmentation_map const uint8_t *const map = seg->update_map ? cpi->segmentation_map
: cm->last_frame_seg_map; : cm->last_frame_seg_map;
int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); int segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col);
seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
} }
x->source_variance = UINT_MAX; x->source_variance = UINT_MAX;
if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
const BLOCK_SIZE bsize = const BLOCK_SIZE bsize =
seg_skip ? BLOCK_64X64 : sf->always_this_block_size; seg_skip ? BLOCK_LARGEST : sf->always_this_block_size;
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST);
set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, &dummy_rate, &dummy_dist, BLOCK_LARGEST, &dummy_rate, &dummy_dist,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
&dummy_rate_nocoef, &dummy_rate_nocoef,
#endif // CONFIG_SUPERTX #endif // CONFIG_SUPERTX
1, td->pc_root); 1, td->pc_root);
} else if (cpi->partition_search_skippable_frame) { } else if (cpi->partition_search_skippable_frame) {
BLOCK_SIZE bsize; BLOCK_SIZE bsize;
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST);
bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, &dummy_rate, &dummy_dist, BLOCK_LARGEST, &dummy_rate, &dummy_dist,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
&dummy_rate_nocoef, &dummy_rate_nocoef,
#endif // CONFIG_SUPERTX #endif // CONFIG_SUPERTX
@@ -4218,7 +4302,7 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
cm->frame_type != KEY_FRAME) { cm->frame_type != KEY_FRAME) {
choose_partitioning(cpi, tile_info, x, mi_row, mi_col); choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, &dummy_rate, &dummy_dist, BLOCK_LARGEST, &dummy_rate, &dummy_dist,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
&dummy_rate_nocoef, &dummy_rate_nocoef,
#endif // CONFIG_SUPERTX #endif // CONFIG_SUPERTX
@@ -4226,12 +4310,12 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
} else { } else {
// If required set upper and lower partition size limits // If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) { if (sf->auto_min_max_partition_size) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST);
rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
&x->min_partition_size, &x->min_partition_size,
&x->max_partition_size); &x->max_partition_size);
} }
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_LARGEST,
&dummy_rdc, &dummy_rdc,
#if CONFIG_SUPERTX #if CONFIG_SUPERTX
&dummy_rate_nocoef, &dummy_rate_nocoef,
@@ -4930,19 +5014,15 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
#if CONFIG_OBMC #if CONFIG_OBMC
if (mbmi->obmc) { if (mbmi->obmc) {
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t,
tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
#else #else
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t,
tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
assert(mbmi->sb_type >= BLOCK_8X8); assert(mbmi->sb_type >= BLOCK_8X8);
@@ -4950,21 +5030,19 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t); int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len); dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR( dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len);
tmp_buf1 + CU_SIZE * CU_SIZE * 2 * len);
dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len); dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR( dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
tmp_buf2 + CU_SIZE * CU_SIZE * 2 * len);
} else { } else {
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
dst_buf1[0] = tmp_buf1; dst_buf1[0] = tmp_buf1;
dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE; dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2; dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
dst_buf2[0] = tmp_buf2; dst_buf2[0] = tmp_buf2;
dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE; dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2; dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
} }
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
@@ -5447,38 +5525,35 @@ static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td,
int i, ctx; int i, ctx;
uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3];
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint8_t, tmp_buf3[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); int dst_stride1[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
DECLARE_ALIGNED(16, uint8_t, int dst_stride2[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); int dst_stride3[3] = {MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE};
int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN};
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t); int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len); dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_TX_SQUARE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len); dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_TX_SQUARE * len);
dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len); dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_TX_SQUARE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len); dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_TX_SQUARE * len);
dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3); dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3);
dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len); dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAX_TX_SQUARE * len);
dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len); dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAX_TX_SQUARE * len);
} else { } else {
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
dst_buf1[0] = tmp_buf1; dst_buf1[0] = tmp_buf1;
dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN; dst_buf1[1] = tmp_buf1 + MAX_TX_SQUARE;
dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN; dst_buf1[2] = tmp_buf1 + 2 * MAX_TX_SQUARE;
dst_buf2[0] = tmp_buf2; dst_buf2[0] = tmp_buf2;
dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN; dst_buf2[1] = tmp_buf2 + MAX_TX_SQUARE;
dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN; dst_buf2[2] = tmp_buf2 + 2 * MAX_TX_SQUARE;
dst_buf3[0] = tmp_buf3; dst_buf3[0] = tmp_buf3;
dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN; dst_buf3[1] = tmp_buf3 + MAX_TX_SQUARE;
dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN; dst_buf3[2] = tmp_buf3 + 2 * MAX_TX_SQUARE;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
} }
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
@@ -6037,7 +6112,8 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td,
sse_uv = 0; sse_uv = 0;
for (plane = 1; plane < MAX_MB_PLANE; ++plane) { for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
ENTROPY_CONTEXT ctxa[16], ctxl[16]; ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE];
ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE];
const struct macroblockd_plane *const pd = &xd->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane];
int coeff_ctx = 1; int coeff_ctx = 1;
@@ -6081,7 +6157,8 @@ static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td,
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
ENTROPY_CONTEXT ctxa[16], ctxl[16]; ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE];
ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE];
const struct macroblockd_plane *const pd = &xd->plane[0]; const struct macroblockd_plane *const pd = &xd->plane[0];
int coeff_ctx = 1; int coeff_ctx = 1;
#endif // CONFIG_VAR_TX #endif // CONFIG_VAR_TX

View File

@@ -29,8 +29,8 @@
#include "vp10/encoder/tokenize.h" #include "vp10/encoder/tokenize.h"
struct optimize_ctx { struct optimize_ctx {
ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; ENTROPY_CONTEXT ta[MAX_MB_PLANE][2 * MI_BLOCK_SIZE];
ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; ENTROPY_CONTEXT tl[MAX_MB_PLANE][2 * MI_BLOCK_SIZE];
}; };
void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
@@ -96,9 +96,9 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
struct macroblock_plane *const p = &mb->plane[plane]; struct macroblock_plane *const p = &mb->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane];
const int ref = is_inter_block(&xd->mi[0]->mbmi); const int ref = is_inter_block(&xd->mi[0]->mbmi);
vp10_token_state tokens[1025][2]; vp10_token_state tokens[MAX_TX_SQUARE+1][2];
unsigned best_index[1025][2]; unsigned best_index[MAX_TX_SQUARE+1][2];
uint8_t token_cache[1024]; uint8_t token_cache[MAX_TX_SQUARE];
const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);

View File

@@ -1955,6 +1955,8 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) {
CHECK_MEM_ERROR(cm, x->palette_buffer, CHECK_MEM_ERROR(cm, x->palette_buffer,
vpx_memalign(16, sizeof(*x->palette_buffer))); vpx_memalign(16, sizeof(*x->palette_buffer)));
} }
vp10_free_pc_tree(&cpi->td);
vp10_setup_pc_tree(&cpi->common, &cpi->td);
} }
vp10_reset_segment_features(cm); vp10_reset_segment_features(cm);
@@ -3147,7 +3149,7 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) {
} }
if (lf->filter_level > 0) { if (lf->filter_level > 0) {
#if CONFIG_VAR_TX #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION
vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
#else #else
if (cpi->num_workers > 1) if (cpi->num_workers > 1)

View File

@@ -312,8 +312,8 @@ typedef struct VP10_COMP {
QUANTS quants; QUANTS quants;
ThreadData td; ThreadData td;
MB_MODE_INFO_EXT *mbmi_ext_base; MB_MODE_INFO_EXT *mbmi_ext_base;
DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); // 8: SIMD width
DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); // 8: SIMD width
VP10_COMMON common; VP10_COMMON common;
VP10EncoderConfig oxcf; VP10EncoderConfig oxcf;
struct lookahead_ctx *lookahead; struct lookahead_ctx *lookahead;

View File

@@ -366,13 +366,13 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (second_pred != NULL) { if (second_pred != NULL) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
y_stride); y_stride);
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride,
sse1); sse1);
} else { } else {
DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
} }
@@ -384,7 +384,7 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd,
#else #else
(void) xd; (void) xd;
if (second_pred != NULL) { if (second_pred != NULL) {
DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
} else { } else {
@@ -694,7 +694,7 @@ static int upsampled_pref_error(const MACROBLOCKD *xd,
unsigned int besterr; unsigned int besterr;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]); DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
if (second_pred != NULL) if (second_pred != NULL)
vpx_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y, vpx_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
y_stride); y_stride);
@@ -704,9 +704,9 @@ static int upsampled_pref_error(const MACROBLOCKD *xd,
besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride,
sse); sse);
} else { } else {
DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
#else #else
DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
(void) xd; (void) xd;
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
if (second_pred != NULL) if (second_pred != NULL)
@@ -1961,10 +1961,10 @@ unsigned int vp10_int_pro_motion_estimation(const VP10_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
DECLARE_ALIGNED(16, int16_t, hbuf[128]); DECLARE_ALIGNED(16, int16_t, hbuf[2 * MAX_SB_SIZE]);
DECLARE_ALIGNED(16, int16_t, vbuf[128]); DECLARE_ALIGNED(16, int16_t, vbuf[2 * MAX_SB_SIZE]);
DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); DECLARE_ALIGNED(16, int16_t, src_hbuf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); DECLARE_ALIGNED(16, int16_t, src_vbuf[MAX_SB_SQUARE]);
int idx; int idx;
const int bw = 4 << b_width_log2_lookup[bsize]; const int bw = 4 << b_width_log2_lookup[bsize];
const int bh = 4 << b_height_log2_lookup[bsize]; const int bh = 4 << b_height_log2_lookup[bsize];

View File

@@ -41,7 +41,7 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
VP10_COMMON *const cm = &cpi->common; VP10_COMMON *const cm = &cpi->common;
int64_t filt_err; int64_t filt_err;
#if CONFIG_VAR_TX #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION
vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
1, partial_frame); 1, partial_frame);
#else #else

View File

@@ -461,7 +461,7 @@ void vp10_init_quantizer(VP10_COMP *cpi) {
cpi->uv_dequant[q][i] = quant; cpi->uv_dequant[q][i] = quant;
} }
for (i = 2; i < 8; i++) { for (i = 2; i < 8; i++) { // 8: SIMD width
quants->y_quant[q][i] = quants->y_quant[q][1]; quants->y_quant[q][i] = quants->y_quant[q][1];
quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1]; quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
quants->y_round_fp[q][i] = quants->y_round_fp[q][1]; quants->y_round_fp[q][i] = quants->y_round_fp[q][1];

View File

@@ -27,6 +27,7 @@ typedef void (*VP10_QUANT_FACADE)(const tran_low_t *coeff_ptr,
const scan_order *sc); const scan_order *sc);
typedef struct { typedef struct {
// 0: dc 1: ac 2-8: ac repeated to SIMD width
DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);

View File

@@ -62,7 +62,10 @@ void vp10_rd_cost_init(RD_COST *rd_cost) {
// This table is used to correct for block size. // This table is used to correct for block size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc). // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = { static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32,
#if CONFIG_EXT_PARTITION
48, 48, 64
#endif // CONFIG_EXT_PARTITION
}; };
static void fill_mode_costs(VP10_COMP *cpi) { static void fill_mode_costs(VP10_COMP *cpi) {
@@ -560,8 +563,8 @@ void vp10_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
const struct macroblockd_plane *pd, const struct macroblockd_plane *pd,
ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE],
ENTROPY_CONTEXT t_left[16]) { ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]) {
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
@@ -935,7 +938,7 @@ void vp10_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
int mode; int mode;
for (mode = 0; mode < top_mode; ++mode) { for (mode = 0; mode < top_mode; ++mode) {
const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4); const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64); const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_LARGEST);
BLOCK_SIZE bs; BLOCK_SIZE bs;
for (bs = min_size; bs <= max_size; ++bs) { for (bs = min_size; bs <= max_size; ++bs) {
int *const fact = &factor_buf[bs][mode]; int *const fact = &factor_buf[bs][mode];

View File

@@ -330,8 +330,8 @@ void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame);
void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
const struct macroblockd_plane *pd, const struct macroblockd_plane *pd,
ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE],
ENTROPY_CONTEXT t_left[16]); ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE]);
void vp10_set_rd_speed_thresholds(struct VP10_COMP *cpi); void vp10_set_rd_speed_thresholds(struct VP10_COMP *cpi);

View File

@@ -102,8 +102,8 @@ typedef struct {
struct rdcost_block_args { struct rdcost_block_args {
const VP10_COMP *cpi; const VP10_COMP *cpi;
MACROBLOCK *x; MACROBLOCK *x;
ENTROPY_CONTEXT t_above[16]; ENTROPY_CONTEXT t_above[2 * MI_BLOCK_SIZE];
ENTROPY_CONTEXT t_left[16]; ENTROPY_CONTEXT t_left[2 * MI_BLOCK_SIZE];
int this_rate; int this_rate;
int64_t this_dist; int64_t this_dist;
int64_t this_sse; int64_t this_sse;
@@ -376,8 +376,8 @@ static void get_energy_distribution_fine(const VP10_COMP *cpi,
unsigned int esq[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; unsigned int esq[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
unsigned int var[16]; unsigned int var[16];
double total = 0; double total = 0;
const int f_index = bsize - 6;
const int f_index = bsize - BLOCK_16X16;
if (f_index < 0) { if (f_index < 0) {
int i, j, index; int i, j, index;
int w_shift = bw == 8 ? 1 : 2; int w_shift = bw == 8 ? 1 : 2;
@@ -890,7 +890,7 @@ static int cost_coeffs(MACROBLOCK *x,
const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
x->token_costs[tx_size][type][is_inter_block(mbmi)]; x->token_costs[tx_size][type][is_inter_block(mbmi)];
uint8_t token_cache[32 * 32]; uint8_t token_cache[MAX_TX_SQUARE];
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
int pt = coeff_ctx; int pt = coeff_ctx;
#else #else
@@ -1045,10 +1045,10 @@ static void dist_block(const VP10_COMP *cpi, MACROBLOCK *x, int plane,
if (*eob) { if (*eob) {
const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, recon16[32 * 32]); // MAX TX_SIZE**2 DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
uint8_t *recon = (uint8_t*)recon16; uint8_t *recon = (uint8_t*)recon16;
#else #else
DECLARE_ALIGNED(16, uint8_t, recon[32 * 32]); // MAX TX_SIZE**2 DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
const PLANE_TYPE plane_type = plane == 0 ? PLANE_TYPE_Y : PLANE_TYPE_UV; const PLANE_TYPE plane_type = plane == 0 ? PLANE_TYPE_Y : PLANE_TYPE_UV;
@@ -1064,18 +1064,18 @@ static void dist_block(const VP10_COMP *cpi, MACROBLOCK *x, int plane,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
recon = CONVERT_TO_BYTEPTR(recon); recon = CONVERT_TO_BYTEPTR(recon);
inv_txfm_param.bd = xd->bd; inv_txfm_param.bd = xd->bd;
vpx_highbd_convolve_copy(dst, dst_stride, recon, 32, vpx_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE,
NULL, 0, NULL, 0, bs, bs, xd->bd); NULL, 0, NULL, 0, bs, bs, xd->bd);
highbd_inv_txfm_add(dqcoeff, recon, 32, &inv_txfm_param); highbd_inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
} else } else
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
{ {
vpx_convolve_copy(dst, dst_stride, recon, 32, vpx_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE,
NULL, 0, NULL, 0, bs, bs); NULL, 0, NULL, 0, bs, bs);
inv_txfm_add(dqcoeff, recon, 32, &inv_txfm_param); inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
} }
cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, 32, &tmp); cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, MAX_TX_SIZE, &tmp);
} }
*out_dist = (int64_t)tmp * 16; *out_dist = (int64_t)tmp * 16;
@@ -2838,10 +2838,10 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col]; uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col];
uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, rec_buffer_alloc_16[32 * 32]); DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
uint8_t *rec_buffer; uint8_t *rec_buffer;
#else #else
DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]); DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
@@ -2860,16 +2860,16 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
// TODO(any): Use dist_block to compute distortion // TODO(any): Use dist_block to compute distortion
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer_alloc_16); rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16);
vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE,
NULL, 0, NULL, 0, bh, bh, xd->bd); NULL, 0, NULL, 0, bh, bh, xd->bd);
} else { } else {
rec_buffer = (uint8_t *)rec_buffer_alloc_16; rec_buffer = (uint8_t *)rec_buffer16;
vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE,
NULL, 0, NULL, 0, bh, bh); NULL, 0, NULL, 0, bh, bh);
} }
#else #else
vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE,
NULL, 0, NULL, 0, bh, bh); NULL, 0, NULL, 0, bh, bh);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
@@ -2904,12 +2904,12 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
inv_txfm_param.bd = xd->bd; inv_txfm_param.bd = xd->bd;
highbd_inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param); highbd_inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param);
} else { } else {
inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param); inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param);
} }
#else // CONFIG_VP9_HIGHBITDEPTH #else // CONFIG_VP9_HIGHBITDEPTH
inv_txfm_add(dqcoeff, rec_buffer, 32, &inv_txfm_param); inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
if ((bh >> 2) + blk_col > max_blocks_wide || if ((bh >> 2) + blk_col > max_blocks_wide ||
@@ -2921,16 +2921,16 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
tmp = 0; tmp = 0;
for (idy = 0; idy < blocks_height; idy += 2) { for (idy = 0; idy < blocks_height; idy += 2) {
for (idx = 0; idx < blocks_width; idx += 2) { for (idx = 0; idx < blocks_width; idx += 2) {
cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx, uint8_t *const s = src + 4 * idy * src_stride + 4 * idx;
src_stride, uint8_t *const r = rec_buffer + 4 * idy * MAX_TX_SIZE + 4 * idx;
rec_buffer + 4 * idy * 32 + 4 * idx, cpi->fn_ptr[BLOCK_8X8].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
32, &this_dist);
tmp += this_dist; tmp += this_dist;
} }
} }
} else { } else {
uint32_t this_dist; uint32_t this_dist;
cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &this_dist); cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE,
&this_dist);
tmp = this_dist; tmp = this_dist;
} }
} }
@@ -3125,8 +3125,10 @@ static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
int idx, idy; int idx, idy;
int block = 0; int block = 0;
int step = 1 << (max_txsize_lookup[plane_bsize] * 2); int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
ENTROPY_CONTEXT ctxa[16], ctxl[16]; ENTROPY_CONTEXT ctxa[2 * MI_BLOCK_SIZE];
TXFM_CONTEXT tx_above[8], tx_left[8]; ENTROPY_CONTEXT ctxl[2 * MI_BLOCK_SIZE];
TXFM_CONTEXT tx_above[MI_BLOCK_SIZE];
TXFM_CONTEXT tx_left[MI_BLOCK_SIZE];
int pnrate = 0, pnskip = 1; int pnrate = 0, pnskip = 1;
int64_t pndist = 0, pnsse = 0; int64_t pndist = 0, pnsse = 0;
@@ -3240,7 +3242,7 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
const int is_inter = is_inter_block(mbmi); const int is_inter = is_inter_block(mbmi);
TX_SIZE best_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE]; TX_SIZE best_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
TX_SIZE best_tx = TX_SIZES; TX_SIZE best_tx = TX_SIZES;
uint8_t best_blk_skip[256]; uint8_t best_blk_skip[MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4];
const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4); const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
int idx, idy; int idx, idy;
int prune = 0; int prune = 0;
@@ -3423,7 +3425,8 @@ static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
int step = 1 << (max_txsize_lookup[plane_bsize] * 2); int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
int pnrate = 0, pnskip = 1; int pnrate = 0, pnskip = 1;
int64_t pndist = 0, pnsse = 0; int64_t pndist = 0, pnsse = 0;
ENTROPY_CONTEXT ta[16], tl[16]; ENTROPY_CONTEXT ta[2 * MI_BLOCK_SIZE];
ENTROPY_CONTEXT tl[2 * MI_BLOCK_SIZE];
vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl); vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
@@ -4560,10 +4563,10 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
// Prediction buffer from second frame. // Prediction buffer from second frame.
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]); DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
uint8_t *second_pred; uint8_t *second_pred;
#else #else
DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]); DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
for (ref = 0; ref < 2; ++ref) { for (ref = 0; ref < 2; ++ref) {
@@ -5733,9 +5736,9 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
step_param = cpi->mv_step_param; step_param = cpi->mv_step_param;
} }
if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) { if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST) {
int boffset = int boffset =
2 * (b_width_log2_lookup[BLOCK_64X64] - 2 * (b_width_log2_lookup[BLOCK_LARGEST] -
VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
step_param = VPXMAX(step_param, boffset); step_param = VPXMAX(step_param, boffset);
} }
@@ -6202,16 +6205,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
const int * const intra_mode_cost = const int * const intra_mode_cost =
cpi->mbmode_cost[size_group_lookup[bsize]]; cpi->mbmode_cost[size_group_lookup[bsize]];
const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME); const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
const int tmp_buf_sz = CU_SIZE * CU_SIZE;
#if CONFIG_REF_MV #if CONFIG_REF_MV
uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame); uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
#endif #endif
#endif // CONFIG_EXT_INTER #endif // CONFIG_EXT_INTER
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * MAX_SB_SQUARE]);
uint8_t *tmp_buf; uint8_t *tmp_buf;
#else #else
DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_OBMC #if CONFIG_OBMC
@@ -6226,7 +6228,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
int best_rate_y, best_rate_uv; int best_rate_y, best_rate_uv;
#endif // CONFIG_SUPERTX #endif // CONFIG_SUPERTX
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
uint8_t best_blk_skip[3][256]; uint8_t best_blk_skip[MAX_MB_PLANE][MI_BLOCK_SIZE * MI_BLOCK_SIZE * 4];
#endif // CONFIG_VAR_TX #endif // CONFIG_VAR_TX
int64_t best_distortion = INT64_MAX; int64_t best_distortion = INT64_MAX;
unsigned int best_pred_var = UINT_MAX; unsigned int best_pred_var = UINT_MAX;
@@ -6241,8 +6243,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
int orig_dst_stride[MAX_MB_PLANE]; int orig_dst_stride[MAX_MB_PLANE];
int rs = 0; int rs = 0;
INTERP_FILTER best_filter = SWITCHABLE; INTERP_FILTER best_filter = SWITCHABLE;
uint8_t skip_txfm[MAX_MB_PLANE][4] = {{0}}; uint8_t skip_txfm[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB] = {{0}};
int64_t bsse[MAX_MB_PLANE][4] = {{0}}; int64_t bsse[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB] = {{0}};
int skip_txfm_sb = 0; int skip_txfm_sb = 0;
int64_t skip_sse_sb = INT64_MAX; int64_t skip_sse_sb = INT64_MAX;
@@ -6569,8 +6571,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
restore_dst_buf(xd, orig_dst, orig_dst_stride); restore_dst_buf(xd, orig_dst, orig_dst_stride);
} else { } else {
for (j = 0; j < MAX_MB_PLANE; j++) { for (j = 0; j < MAX_MB_PLANE; j++) {
xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
xd->plane[j].dst.stride = 64; xd->plane[j].dst.stride = MAX_SB_SIZE;
} }
} }
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
@@ -6648,15 +6650,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
if (have_newmv_in_inter_mode(this_mode)) { if (have_newmv_in_inter_mode(this_mode)) {
int_mv tmp_mv[2]; int_mv tmp_mv[2];
int rate_mvs[2], tmp_rate_mv = 0; int rate_mvs[2], tmp_rate_mv = 0;
uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3]; uint8_t pred0[2 * MAX_SB_SQUARE * 3];
uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3]; uint8_t pred1[2 * MAX_SB_SQUARE * 3];
uint8_t *preds0[3] = {pred0, uint8_t *preds0[3] = {pred0,
pred0 + 2 * CU_SIZE * CU_SIZE, pred0 + 2 * MAX_SB_SQUARE,
pred0 + 4 * CU_SIZE * CU_SIZE}; pred0 + 4 * MAX_SB_SQUARE};
uint8_t *preds1[3] = {pred1, uint8_t *preds1[3] = {pred1,
pred1 + 2 * CU_SIZE * CU_SIZE, pred1 + 2 * MAX_SB_SQUARE,
pred1 + 4 * CU_SIZE * CU_SIZE}; pred1 + 4 * MAX_SB_SQUARE};
int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE}; int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
vp10_build_inter_predictors_for_planes_single_buf( vp10_build_inter_predictors_for_planes_single_buf(
xd, bsize, mi_row, mi_col, 0, preds0, strides); xd, bsize, mi_row, mi_col, 0, preds0, strides);
vp10_build_inter_predictors_for_planes_single_buf( vp10_build_inter_predictors_for_planes_single_buf(
@@ -6723,15 +6725,15 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
mbmi->mv[1].as_int = cur_mv[1].as_int; mbmi->mv[1].as_int = cur_mv[1].as_int;
} }
} else { } else {
uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3]; uint8_t pred0[2 * MAX_SB_SQUARE * 3];
uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3]; uint8_t pred1[2 * MAX_SB_SQUARE * 3];
uint8_t *preds0[3] = {pred0, uint8_t *preds0[3] = {pred0,
pred0 + 2 * CU_SIZE * CU_SIZE, pred0 + 2 * MAX_SB_SQUARE,
pred0 + 4 * CU_SIZE * CU_SIZE}; pred0 + 4 * MAX_SB_SQUARE};
uint8_t *preds1[3] = {pred1, uint8_t *preds1[3] = {pred1,
pred1 + 2 * CU_SIZE * CU_SIZE, pred1 + 2 * MAX_SB_SQUARE,
pred1 + 4 * CU_SIZE * CU_SIZE}; pred1 + 4 * MAX_SB_SQUARE};
int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE}; int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
vp10_build_inter_predictors_for_planes_single_buf( vp10_build_inter_predictors_for_planes_single_buf(
xd, bsize, mi_row, mi_col, 0, preds0, strides); xd, bsize, mi_row, mi_col, 0, preds0, strides);
vp10_build_inter_predictors_for_planes_single_buf( vp10_build_inter_predictors_for_planes_single_buf(
@@ -6791,8 +6793,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
int tmp_rate_mv = 0; int tmp_rate_mv = 0;
mbmi->ref_frame[1] = NONE; mbmi->ref_frame[1] = NONE;
for (j = 0; j < MAX_MB_PLANE; j++) { for (j = 0; j < MAX_MB_PLANE; j++) {
xd->plane[j].dst.buf = tmp_buf + j * tmp_buf_sz; xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
xd->plane[j].dst.stride = CU_SIZE; xd->plane[j].dst.stride = MAX_SB_SIZE;
} }
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
restore_dst_buf(xd, orig_dst, orig_dst_stride); restore_dst_buf(xd, orig_dst, orig_dst_stride);
@@ -6805,11 +6807,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
rmode = intra_mode_cost[mbmi->interintra_mode]; rmode = intra_mode_cost[mbmi->interintra_mode];
vp10_build_interintra_predictors(xd, vp10_build_interintra_predictors(xd,
tmp_buf, tmp_buf,
tmp_buf + tmp_buf_sz, tmp_buf + MAX_SB_SQUARE,
tmp_buf + 2 * tmp_buf_sz, tmp_buf + 2 * MAX_SB_SQUARE,
CU_SIZE, MAX_SB_SIZE,
CU_SIZE, MAX_SB_SIZE,
CU_SIZE, MAX_SB_SIZE,
bsize); bsize);
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
&skip_txfm_sb, &skip_sse_sb); &skip_txfm_sb, &skip_sse_sb);
@@ -6830,11 +6832,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
if (wedge_bits) { if (wedge_bits) {
vp10_build_interintra_predictors(xd, vp10_build_interintra_predictors(xd,
tmp_buf, tmp_buf,
tmp_buf + tmp_buf_sz, tmp_buf + MAX_SB_SQUARE,
tmp_buf + 2 * tmp_buf_sz, tmp_buf + 2 * MAX_SB_SQUARE,
CU_SIZE, MAX_SB_SIZE,
CU_SIZE, MAX_SB_SIZE,
CU_SIZE, MAX_SB_SIZE,
bsize); bsize);
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
&skip_txfm_sb, &skip_sse_sb); &skip_txfm_sb, &skip_sse_sb);
@@ -6852,11 +6854,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
mbmi->interintra_uv_wedge_index = wedge_index; mbmi->interintra_uv_wedge_index = wedge_index;
vp10_build_interintra_predictors(xd, vp10_build_interintra_predictors(xd,
tmp_buf, tmp_buf,
tmp_buf + tmp_buf_sz, tmp_buf + MAX_SB_SQUARE,
tmp_buf + 2 * tmp_buf_sz, tmp_buf + 2 * MAX_SB_SQUARE,
CU_SIZE, MAX_SB_SIZE,
CU_SIZE, MAX_SB_SIZE,
CU_SIZE, MAX_SB_SIZE,
bsize); bsize);
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
&skip_txfm_sb, &skip_sse_sb); &skip_txfm_sb, &skip_sse_sb);
@@ -6937,8 +6939,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
if (best_needs_copy) { if (best_needs_copy) {
// again temporarily set the buffers to local memory to prevent a memcpy // again temporarily set the buffers to local memory to prevent a memcpy
for (i = 0; i < MAX_MB_PLANE; i++) { for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; xd->plane[i].dst.buf = tmp_buf + i * MAX_SB_SQUARE;
xd->plane[i].dst.stride = 64; xd->plane[i].dst.stride = MAX_SB_SIZE;
} }
} }
rd = tmp_rd; rd = tmp_rd;
@@ -7572,33 +7574,33 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
const MODE_INFO *left_mi = xd->left_mi; const MODE_INFO *left_mi = xd->left_mi;
#if CONFIG_OBMC #if CONFIG_OBMC
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]); DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]); DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
#else #else
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]); DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]); DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
uint8_t *dst_buf1[3], *dst_buf2[3]; uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[3] = {64, 64, 64}; int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int dst_stride2[3] = {64, 64, 64}; int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t); int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len); dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len); dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_SB_SQUARE * len);
dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len); dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len); dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_SB_SQUARE * len);
} else { } else {
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
dst_buf1[0] = tmp_buf1; dst_buf1[0] = tmp_buf1;
dst_buf1[1] = tmp_buf1 + 4096; dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
dst_buf1[2] = tmp_buf1 + 8192; dst_buf1[2] = tmp_buf1 + 2 * MAX_SB_SQUARE;
dst_buf2[0] = tmp_buf2; dst_buf2[0] = tmp_buf2;
dst_buf2[1] = tmp_buf2 + 4096; dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
dst_buf2[2] = tmp_buf2 + 8192; dst_buf2[2] = tmp_buf2 + 2 * MAX_SB_SQUARE;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
} }
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
@@ -9386,7 +9388,7 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
int switchable_filter_index; int switchable_filter_index;
int_mv *second_ref = comp_pred ? int_mv *second_ref = comp_pred ?
&x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL; &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
b_mode_info tmp_best_bmodes[16]; b_mode_info tmp_best_bmodes[16]; // Should this be 4 ?
MB_MODE_INFO tmp_best_mbmode; MB_MODE_INFO tmp_best_mbmode;
BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
int pred_exists = 0; int pred_exists = 0;

View File

@@ -328,13 +328,13 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride + mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride +
tile_info.mi_col_start; tile_info.mi_col_start;
for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end; for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
mi_row += 8, mi_ptr += 8 * cm->mi_stride) { mi_row += MI_BLOCK_SIZE, mi_ptr += MI_BLOCK_SIZE * cm->mi_stride) {
MODE_INFO **mi = mi_ptr; MODE_INFO **mi = mi_ptr;
for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
mi_col += 8, mi += 8) { mi_col += MI_BLOCK_SIZE, mi += MI_BLOCK_SIZE) {
count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts, count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts, temporal_predictor_count, t_unpred_seg_counts,
mi_row, mi_col, BLOCK_64X64); mi_row, mi_col, BLOCK_LARGEST);
} }
} }
} }

View File

@@ -353,6 +353,11 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST; sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST; sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST; sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST;
#if CONFIG_EXT_PARTITION
sf->inter_mode_mask[BLOCK_64X128] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_128X64] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_128X128] = INTER_NEAREST;
#endif // CONFIG_EXT_PARTITION
sf->max_intra_bsize = BLOCK_32X32; sf->max_intra_bsize = BLOCK_32X32;
sf->allow_skip_recode = 1; sf->allow_skip_recode = 1;
} }
@@ -372,6 +377,11 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
#if CONFIG_EXT_PARTITION
sf->inter_mode_mask[BLOCK_64X128] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_128X64] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_128X128] = INTER_NEAREST_NEW_ZERO;
#endif // CONFIG_EXT_PARTITION
sf->adaptive_rd_thresh = 2; sf->adaptive_rd_thresh = 2;
// This feature is only enabled when partition search is disabled. // This feature is only enabled when partition search is disabled.
sf->reuse_inter_pred_sby = 1; sf->reuse_inter_pred_sby = 1;
@@ -483,7 +493,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->use_square_partition_only = 0; sf->use_square_partition_only = 0;
sf->auto_min_max_partition_size = NOT_IN_USE; sf->auto_min_max_partition_size = NOT_IN_USE;
sf->rd_auto_partition_min_limit = BLOCK_4X4; sf->rd_auto_partition_min_limit = BLOCK_4X4;
sf->default_max_partition_size = BLOCK_64X64; sf->default_max_partition_size = BLOCK_LARGEST;
sf->default_min_partition_size = BLOCK_4X4; sf->default_min_partition_size = BLOCK_4X4;
sf->adjust_partitioning_from_last_frame = 0; sf->adjust_partitioning_from_last_frame = 0;
sf->last_partitioning_redo_frequency = 4; sf->last_partitioning_redo_frequency = 4;
@@ -514,7 +524,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->schedule_mode_search = 0; sf->schedule_mode_search = 0;
for (i = 0; i < BLOCK_SIZES; ++i) for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_ALL; sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_64X64; sf->max_intra_bsize = BLOCK_LARGEST;
sf->reuse_inter_pred_sby = 0; sf->reuse_inter_pred_sby = 0;
// This setting only takes effect when partition_search_type is set // This setting only takes effect when partition_search_type is set
// to FIXED_PARTITION. // to FIXED_PARTITION.
@@ -541,6 +551,12 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
else if (oxcf->mode == GOOD) else if (oxcf->mode == GOOD)
set_good_speed_feature(cpi, cm, sf, oxcf->speed); set_good_speed_feature(cpi, cm, sf, oxcf->speed);
// sf->partition_search_breakout_dist_thr is set assuming max 64x64
// blocks. Normalise this if the blocks are bigger.
if (MAX_SB_SIZE_LOG2 > 6) {
sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6);
}
cpi->full_search_sad = vp10_full_search_sad; cpi->full_search_sad = vp10_full_search_sad;
cpi->diamond_search_sad = vp10_diamond_search_sad; cpi->diamond_search_sad = vp10_diamond_search_sad;

View File

@@ -438,7 +438,7 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
MACROBLOCK *const x = &td->mb; MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
TOKENEXTRA **tp = args->tp; TOKENEXTRA **tp = args->tp;
uint8_t token_cache[32 * 32]; uint8_t token_cache[MAX_TX_SQUARE];
struct macroblock_plane *p = &x->plane[plane]; struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane];
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;

View File

@@ -361,9 +361,7 @@ int vp10_denoiser_filter_sse2(const uint8_t *sig, int sig_stride,
avg, avg_stride, avg, avg_stride,
increase_denoising, increase_denoising,
bs, motion_magnitude, 8); bs, motion_magnitude, 8);
} else if (bs == BLOCK_16X8 || bs == BLOCK_16X16 || bs == BLOCK_16X32 || } else if (bs < BLOCK_SIZES) {
bs == BLOCK_32X16|| bs == BLOCK_32X32 || bs == BLOCK_32X64 ||
bs == BLOCK_64X32 || bs == BLOCK_64X64) {
return vp10_denoiser_NxM_sse2_big(sig, sig_stride, return vp10_denoiser_NxM_sse2_big(sig, sig_stride,
mc_avg, mc_avg_stride, mc_avg, mc_avg_stride,
avg, avg_stride, avg, avg_stride,

View File

@@ -130,20 +130,20 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride,
// --Must round-up because block may be located at sub-pixel position. // --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
uint8_t temp[MAX_EXT_SIZE * MAX_CU_SIZE]; uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];
int intermediate_height = int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
assert(w <= MAX_CU_SIZE); assert(w <= MAX_SB_SIZE);
assert(h <= MAX_CU_SIZE); assert(h <= MAX_SB_SIZE);
assert(y_step_q4 <= 32); assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32); assert(x_step_q4 <= 32);
convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
temp, MAX_CU_SIZE, temp, MAX_SB_SIZE,
x_filters, x0_q4, x_step_q4, w, intermediate_height); x_filters, x0_q4, x_step_q4, w, intermediate_height);
convolve_vert(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_CU_SIZE, convolve_vert(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE,
dst, dst_stride, dst, dst_stride,
y_filters, y0_q4, y_step_q4, w, h); y_filters, y0_q4, y_step_q4, w, h);
} }
@@ -240,13 +240,13 @@ void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_y, int y_step_q4, const int16_t *filter_y, int y_step_q4,
int w, int h) { int w, int h) {
/* Fixed size intermediate buffer places limits on parameters. */ /* Fixed size intermediate buffer places limits on parameters. */
DECLARE_ALIGNED(16, uint8_t, temp[MAX_CU_SIZE * MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);
assert(w <= MAX_CU_SIZE); assert(w <= MAX_SB_SIZE);
assert(h <= MAX_CU_SIZE); assert(h <= MAX_SB_SIZE);
vpx_convolve8_c(src, src_stride, temp, MAX_CU_SIZE, vpx_convolve8_c(src, src_stride, temp, MAX_SB_SIZE,
filter_x, x_step_q4, filter_y, y_step_q4, w, h); filter_x, x_step_q4, filter_y, y_step_q4, w, h);
vpx_convolve_avg_c(temp, MAX_CU_SIZE, dst, dst_stride, vpx_convolve_avg_c(temp, MAX_SB_SIZE, dst, dst_stride,
NULL, 0, NULL, 0, w, h); NULL, 0, NULL, 0, w, h);
} }
@@ -463,21 +463,21 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
// --Must round-up because block may be located at sub-pixel position. // --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
uint16_t temp[MAX_EXT_SIZE * MAX_CU_SIZE]; uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];
int intermediate_height = int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
assert(w <= MAX_CU_SIZE); assert(w <= MAX_SB_SIZE);
assert(h <= MAX_CU_SIZE); assert(h <= MAX_SB_SIZE);
assert(y_step_q4 <= 32); assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32); assert(x_step_q4 <= 32);
highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE, CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE,
x_filters, x0_q4, x_step_q4, w, x_filters, x0_q4, x_step_q4, w,
intermediate_height, bd); intermediate_height, bd);
highbd_convolve_vert( highbd_convolve_vert(
CONVERT_TO_BYTEPTR(temp) + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_CU_SIZE, CONVERT_TO_BYTEPTR(temp) + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE,
dst, dst_stride, dst, dst_stride,
y_filters, y0_q4, y_step_q4, w, h, bd); y_filters, y0_q4, y_step_q4, w, h, bd);
} }
@@ -561,14 +561,14 @@ void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_y, int y_step_q4, const int16_t *filter_y, int y_step_q4,
int w, int h, int bd) { int w, int h, int bd) {
// Fixed size intermediate buffer places limits on parameters. // Fixed size intermediate buffer places limits on parameters.
DECLARE_ALIGNED(16, uint16_t, temp[MAX_CU_SIZE * MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint16_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);
assert(w <= MAX_CU_SIZE); assert(w <= MAX_SB_SIZE);
assert(h <= MAX_CU_SIZE); assert(h <= MAX_SB_SIZE);
vpx_highbd_convolve8_c(src, src_stride, vpx_highbd_convolve8_c(src, src_stride,
CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE, CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE,
filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_CU_SIZE, vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE,
dst, dst_stride, dst, dst_stride,
NULL, 0, NULL, 0, w, h, bd); NULL, 0, NULL, 0, w, h, bd);
} }

View File

@@ -13,18 +13,19 @@
#include "./vpx_config.h" #include "./vpx_config.h"
#include "vpx/vpx_integer.h" #include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h" #include "vpx_ports/mem.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#if CONFIG_VP10 && CONFIG_EXT_PARTITION #ifndef MAX_SB_SIZE
# define MAX_CU_SIZE 128 # if CONFIG_VP10 && CONFIG_EXT_PARTITION
#else # define MAX_SB_SIZE 128
# define MAX_CU_SIZE 64 # else
#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION # define MAX_SB_SIZE 64
# endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
#endif // ndef MAX_SB_SIZE
#define VPXMIN(x, y) (((x) < (y)) ? (x) : (y)) #define VPXMIN(x, y) (((x) < (y)) ? (x) : (y))
#define VPXMAX(x, y) (((x) > (y)) ? (x) : (y)) #define VPXMAX(x, y) (((x) > (y)) ? (x) : (y))

View File

@@ -99,27 +99,27 @@ void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
int w, int h) { \ int w, int h) { \
assert(filter_x[3] != 128); \ assert(filter_x[3] != 128); \
assert(filter_y[3] != 128); \ assert(filter_y[3] != 128); \
assert(w <= MAX_CU_SIZE); \ assert(w <= MAX_SB_SIZE); \
assert(h <= MAX_CU_SIZE); \ assert(h <= MAX_SB_SIZE); \
assert(x_step_q4 == 16); \ assert(x_step_q4 == 16); \
assert(y_step_q4 == 16); \ assert(y_step_q4 == 16); \
if (filter_x[0] || filter_x[1] || filter_x[2]|| \ if (filter_x[0] || filter_x[1] || filter_x[2]|| \
filter_y[0] || filter_y[1] || filter_y[2]) { \ filter_y[0] || filter_y[1] || filter_y[2]) { \
DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \ DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+7)]); \
vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
fdata2, MAX_CU_SIZE, \ fdata2, MAX_SB_SIZE, \
filter_x, x_step_q4, filter_y, y_step_q4, \ filter_x, x_step_q4, filter_y, y_step_q4, \
w, h + 7); \ w, h + 7); \
vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * MAX_CU_SIZE, MAX_CU_SIZE, \ vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * MAX_SB_SIZE, MAX_SB_SIZE, \
dst, dst_stride, \ dst, dst_stride, \
filter_x, x_step_q4, filter_y, \ filter_x, x_step_q4, filter_y, \
y_step_q4, w, h); \ y_step_q4, w, h); \
} else { \ } else { \
DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+1)]); \ DECLARE_ALIGNED(16, uint8_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+1)]); \
vpx_convolve8_horiz_##opt(src, src_stride, fdata2, MAX_CU_SIZE, \ vpx_convolve8_horiz_##opt(src, src_stride, fdata2, MAX_SB_SIZE, \
filter_x, x_step_q4, filter_y, y_step_q4, \ filter_x, x_step_q4, filter_y, y_step_q4, \
w, h + 1); \ w, h + 1); \
vpx_convolve8_##avg##vert_##opt(fdata2, MAX_CU_SIZE, dst, dst_stride, \ vpx_convolve8_##avg##vert_##opt(fdata2, MAX_SB_SIZE, dst, dst_stride, \
filter_x, x_step_q4, filter_y, \ filter_x, x_step_q4, filter_y, \
y_step_q4, w, h); \ y_step_q4, w, h); \
} \ } \
@@ -239,38 +239,38 @@ void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
const int16_t *filter_x, int x_step_q4, \ const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, \ const int16_t *filter_y, int y_step_q4, \
int w, int h, int bd) { \ int w, int h, int bd) { \
assert(w <= MAX_CU_SIZE); \ assert(w <= MAX_SB_SIZE); \
assert(h <= MAX_CU_SIZE); \ assert(h <= MAX_SB_SIZE); \
if (x_step_q4 == 16 && y_step_q4 == 16) { \ if (x_step_q4 == 16 && y_step_q4 == 16) { \
if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+7)]); \ DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+7)]); \
vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, \ vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, \
src_stride, \ src_stride, \
CONVERT_TO_BYTEPTR(fdata2), \ CONVERT_TO_BYTEPTR(fdata2), \
MAX_CU_SIZE, \ MAX_SB_SIZE, \
filter_x, x_step_q4, \ filter_x, x_step_q4, \
filter_y, y_step_q4, \ filter_y, y_step_q4, \
w, h + 7, bd); \ w, h + 7, bd); \
vpx_highbd_convolve8_##avg##vert_##opt( \ vpx_highbd_convolve8_##avg##vert_##opt( \
CONVERT_TO_BYTEPTR(fdata2) + 3 * MAX_CU_SIZE, \ CONVERT_TO_BYTEPTR(fdata2) + 3 * MAX_SB_SIZE, \
MAX_CU_SIZE, \ MAX_SB_SIZE, \
dst, \ dst, \
dst_stride, \ dst_stride, \
filter_x, x_step_q4, \ filter_x, x_step_q4, \
filter_y, y_step_q4, \ filter_y, y_step_q4, \
w, h, bd); \ w, h, bd); \
} else { \ } else { \
DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_CU_SIZE * (MAX_CU_SIZE+1)]); \ DECLARE_ALIGNED(16, uint16_t, fdata2[MAX_SB_SIZE * (MAX_SB_SIZE+1)]); \
vpx_highbd_convolve8_horiz_##opt(src, \ vpx_highbd_convolve8_horiz_##opt(src, \
src_stride, \ src_stride, \
CONVERT_TO_BYTEPTR(fdata2), \ CONVERT_TO_BYTEPTR(fdata2), \
MAX_CU_SIZE, \ MAX_SB_SIZE, \
filter_x, x_step_q4, \ filter_x, x_step_q4, \
filter_y, y_step_q4, \ filter_y, y_step_q4, \
w, h + 1, bd); \ w, h + 1, bd); \
vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), \ vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), \
MAX_CU_SIZE, \ MAX_SB_SIZE, \
dst, \ dst, \
dst_stride, \ dst_stride, \
filter_x, x_step_q4, \ filter_x, x_step_q4, \

View File

@@ -31,6 +31,10 @@ cglobal subtract_block, 7, 7, 8, \
je .case_16 je .case_16
cmp colsd, 32 cmp colsd, 32
je .case_32 je .case_32
%if CONFIG_EXT_PARTITION
cmp colsd, 64
je .case_64
%endif
%macro loop16 6 %macro loop16 6
mova m0, [srcq+%1] mova m0, [srcq+%1]
@@ -55,6 +59,22 @@ cglobal subtract_block, 7, 7, 8, \
mova [diffq+mmsize*1+%6], m1 mova [diffq+mmsize*1+%6], m1
%endmacro %endmacro
%if CONFIG_EXT_PARTITION
mov pred_str, pred_stridemp
.loop_128:
loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize
loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize
loop16 4*mmsize, 5*mmsize, 4*mmsize, 5*mmsize, 8*mmsize, 10*mmsize
loop16 6*mmsize, 7*mmsize, 6*mmsize, 7*mmsize, 12*mmsize, 14*mmsize
lea diffq, [diffq+diff_strideq*2]
add predq, pred_str
add srcq, src_strideq
sub rowsd, 1
jnz .loop_128
RET
.case_64:
%endif
mov pred_str, pred_stridemp mov pred_str, pred_stridemp
.loop_64: .loop_64:
loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize

View File

@@ -844,12 +844,12 @@ static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride,
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
// --Require an additional 8 rows for the horiz_w8 transpose tail. // --Require an additional 8 rows for the horiz_w8 transpose tail.
DECLARE_ALIGNED(16, uint8_t, temp[(MAX_EXT_SIZE + 8) * MAX_CU_SIZE]); DECLARE_ALIGNED(16, uint8_t, temp[(MAX_EXT_SIZE + 8) * MAX_SB_SIZE]);
const int intermediate_height = const int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
assert(w <= MAX_CU_SIZE); assert(w <= MAX_SB_SIZE);
assert(h <= MAX_CU_SIZE); assert(h <= MAX_SB_SIZE);
assert(y_step_q4 <= 32); assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32); assert(x_step_q4 <= 32);
@@ -857,33 +857,33 @@ static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride,
scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1), scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1),
src_stride, src_stride,
temp, temp,
MAX_CU_SIZE, MAX_SB_SIZE,
x_filters, x0_q4, x_step_q4, x_filters, x0_q4, x_step_q4,
w, intermediate_height); w, intermediate_height);
} else { } else {
scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1), scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1),
src_stride, src_stride,
temp, temp,
MAX_CU_SIZE, MAX_SB_SIZE,
x_filters, x0_q4, x_step_q4, x_filters, x0_q4, x_step_q4,
w, intermediate_height); w, intermediate_height);
} }
if (w >= 16) { if (w >= 16) {
scaledconvolve_vert_w16(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), scaledconvolve_vert_w16(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),
MAX_CU_SIZE, MAX_SB_SIZE,
dst, dst,
dst_stride, dst_stride,
y_filters, y0_q4, y_step_q4, w, h); y_filters, y0_q4, y_step_q4, w, h);
} else if (w == 8) { } else if (w == 8) {
scaledconvolve_vert_w8(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), scaledconvolve_vert_w8(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),
MAX_CU_SIZE, MAX_SB_SIZE,
dst, dst,
dst_stride, dst_stride,
y_filters, y0_q4, y_step_q4, w, h); y_filters, y0_q4, y_step_q4, w, h);
} else { } else {
scaledconvolve_vert_w4(temp + MAX_CU_SIZE * (SUBPEL_TAPS / 2 - 1), scaledconvolve_vert_w4(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),
MAX_CU_SIZE, MAX_SB_SIZE,
dst, dst,
dst_stride, dst_stride,
y_filters, y0_q4, y_step_q4, w, h); y_filters, y0_q4, y_step_q4, w, h);

View File

@@ -21,7 +21,11 @@ extern "C" {
#include "vpx/vpx_integer.h" #include "vpx/vpx_integer.h"
#define VP8BORDERINPIXELS 32 #define VP8BORDERINPIXELS 32
#define VP9INNERBORDERINPIXELS 96 #if CONFIG_EXT_PARTITION
# define VP9INNERBORDERINPIXELS 160
#else
# define VP9INNERBORDERINPIXELS 96
#endif // CONFIG_EXT_PARTITION
#define VP9_INTERP_EXTEND 4 #define VP9_INTERP_EXTEND 4
#define VP9_ENC_BORDER_IN_PIXELS 160 #define VP9_ENC_BORDER_IN_PIXELS 160
#define VP9_DEC_BORDER_IN_PIXELS 160 #define VP9_DEC_BORDER_IN_PIXELS 160