From 5a88271b09f2c9412a13dae2a282b2c82bce7552 Mon Sep 17 00:00:00 2001 From: Jim Bankoski Date: Thu, 6 Jun 2013 06:07:09 -0700 Subject: [PATCH] don't tokenize & encode tokens for blocks in UMV This avoids encoding tokens for blocks that are entirely in the UMV border. This changes the bitstream. Change-Id: I32b4df46ac8a990d0c37cee92fd34f8ddd4fb6c9 --- test/borders_test.cc | 86 ++++++++++++++++++++++ test/test-data.sha1 | 3 +- test/test.mk | 2 + vp9/common/vp9_alloccommon.c | 1 + vp9/common/vp9_blockd.h | 131 ++++++++++++++++++++++++++++++++-- vp9/common/vp9_mvref_common.c | 54 +++++++++++--- vp9/decoder/vp9_decodframe.c | 5 ++ vp9/decoder/vp9_detokenize.c | 20 ++++-- vp9/encoder/vp9_encodeframe.c | 79 +++++++++++--------- vp9/encoder/vp9_encodemb.c | 5 +- vp9/encoder/vp9_rdopt.c | 69 +++++++++++++----- vp9/encoder/vp9_tokenize.c | 9 ++- 12 files changed, 385 insertions(+), 79 deletions(-) create mode 100644 test/borders_test.cc diff --git a/test/borders_test.cc b/test/borders_test.cc new file mode 100644 index 000000000..8cac4fd09 --- /dev/null +++ b/test/borders_test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include +#include +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +class BordersTest : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWithParam { + protected: + BordersTest() : EncoderTest(GET_PARAM(0)) {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + } + + virtual bool Continue() const { + return !HasFatalFailure() && !abort_; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if ( video->frame() == 1) { + encoder->Control(VP8E_SET_CPUUSED, 5); + encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); + encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); + encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); + encoder->Control(VP8E_SET_ARNR_TYPE, 3); + } + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + } + } +}; + +TEST_P(BordersTest, TestEncodeHighBitrate) { + // Validate that this non multiple of 64 wide clip encodes and decodes + // without a mismatch when passing in a very low max q. This pushes + // the encoder to producing lots of big partitions which will likely + // extend into the border and test the border condition. + cfg_.g_lag_in_frames = 25; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_target_bitrate = 2000; + cfg_.rc_max_quantizer = 10; + + ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 40); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +TEST_P(BordersTest, TestLowBitrate) { + // Validate that this clip encodes and decodes without a mismatch + // when passing in a very high min q. This pushes the encoder to producing + // lots of small partitions which might will test the other condition. + + cfg_.g_lag_in_frames = 25; + cfg_.rc_2pass_vbr_minsection_pct = 5; + cfg_.rc_2pass_vbr_minsection_pct = 2000; + cfg_.rc_target_bitrate = 200; + cfg_.rc_min_quantizer = 40; + + ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, + 40); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +VP9_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values( + ::libvpx_test::kTwoPassGood)); +} // namespace diff --git a/test/test-data.sha1 b/test/test-data.sha1 index c1b6a834c..98cdda0a2 100644 --- a/test/test-data.sha1 +++ b/test/test-data.sha1 @@ -1,4 +1,5 @@ d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv +b87815bf86020c592ccc7a846ba2e28ec8043902 hantro_odd.yuv 5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf 65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf 906b4c1e99eb734504c504b3f1ad8052137ce672 vp80-00-comprehensive-003.ivf @@ -120,4 +121,4 @@ f95eb6214571434f1f73ab7833b9ccdf47588020 vp80-03-segmentation-1437.ivf.md5 41d70bb5fa45bc88da1604a0af466930b8dd77b5 vp80-05-sharpness-1438.ivf.md5 086c56378df81b6cee264d7540a7b8f2b405c7a4 vp80-05-sharpness-1439.ivf.md5 d32dc2c4165eb266ea4c23c14a45459b363def32 vp80-05-sharpness-1440.ivf.md5 -8c69dc3d8e563f56ffab5ad1e400d9e689dd23df vp80-05-sharpness-1443.ivf.md5 \ No newline at end of file +8c69dc3d8e563f56ffab5ad1e400d9e689dd23df vp80-05-sharpness-1443.ivf.md5 diff --git a/test/test.mk b/test/test.mk index 0d069d026..1e0b2172e 100644 --- a/test/test.mk +++ b/test/test.mk @@ -22,6 +22,7 @@ LIBVPX_TEST_SRCS-yes += encode_test_driver.h LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../md5_utils.h ../md5_utils.c @@ -92,6 +93,7 @@ endif ## TEST DATA ## LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index b770d0505..bdebb3327 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -146,6 +146,7 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) { // FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling // information is exposed at this level mi_cols = mi_cols_aligned_to_sb(oci); + # if CONFIG_ALPHA // TODO(jkoleszar): Why is this * 2? oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 8 * mi_cols, 1); diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index c89470b66..05b4fda1b 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -631,14 +631,14 @@ static INLINE void foreach_transformed_block_in_plane( // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 // transform size varies per plane, look it up in a common way. - const TX_SIZE tx_size = plane ? get_uv_tx_size(xd) - : xd->mode_info_context->mbmi.txfm_size; + const TX_SIZE tx_size = + plane ? get_uv_tx_size(xd) : xd->mode_info_context->mbmi.txfm_size; const int block_size_b = bw + bh; const int txfrm_size_b = tx_size * 2; // subsampled size of the block - const int ss_sum = xd->plane[plane].subsampling_x + - xd->plane[plane].subsampling_y; + const int ss_sum = xd->plane[plane].subsampling_x + + xd->plane[plane].subsampling_y; const int ss_block_size = block_size_b - ss_sum; const int step = 1 << txfrm_size_b; @@ -647,8 +647,42 @@ static INLINE void foreach_transformed_block_in_plane( assert(txfrm_size_b <= block_size_b); assert(txfrm_size_b <= ss_block_size); - for (i = 0; i < (1 << ss_block_size); i += step) { - visit(plane, i, bsize, txfrm_size_b, arg); + + // If mb_to_right_edge is < 0 we are in a situation in which + // the current block size extends into the UMV and we won't + // visit the sub blocks that are wholly within the UMV. + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + int r, c; + const int sw = bw - xd->plane[plane].subsampling_x; + const int sh = bh - xd->plane[plane].subsampling_y; + int max_blocks_wide = 1 << sw; + int max_blocks_high = 1 << sh; + + // xd->mb_to_right_edge is in units of pixels * 8. This converts + // it to 4x4 block sizes. + if (xd->mb_to_right_edge < 0) + max_blocks_wide += + + (xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x)); + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += + + (xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y)); + + i = 0; + // Unlike the normal case - in here we have to keep track of the + // row and column of the blocks we use so that we know if we are in + // the unrestricted motion border.. + for (r = 0; r < (1 << sh); r += (1 << tx_size)) { + for (c = 0; c < (1 << sw); c += (1 << tx_size)) { + if (r < max_blocks_high && c < max_blocks_wide) + visit(plane, i, bsize, txfrm_size_b, arg); + i += step; + } + } + } else { + for (i = 0; i < (1 << ss_block_size); i += step) { + visit(plane, i, bsize, txfrm_size_b, arg); + } } } @@ -780,4 +814,89 @@ static void txfrm_block_to_raster_xy(MACROBLOCKD *xd, *x = (raster_mb & (tx_cols - 1)) << (txwl); *y = raster_mb >> tx_cols_lg2 << (txwl); } + +static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block, + BLOCK_SIZE_TYPE bsize, int ss_txfrm_size) { + const int bw = plane_block_width(bsize, &xd->plane[plane]); + const int bh = plane_block_height(bsize, &xd->plane[plane]); + int x, y; + txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y); + x = x * 4 - 1; + y = y * 4 - 1; + // Copy a pixel into the umv if we are in a situation where the block size + // extends into the UMV. + // TODO(JBB): Should be able to do the full extend in place so we don't have + // to do this multiple times. + if (xd->mb_to_right_edge < 0) { + int umv_border_start = bw + + (xd->mb_to_right_edge >> (3 + xd->plane[plane].subsampling_x)); + + if (x + bw > umv_border_start) + vpx_memset( + xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride + + umv_border_start, + *(xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride + + umv_border_start - 1), + bw); + } + if (xd->mb_to_bottom_edge < 0) { + int umv_border_start = bh + + (xd->mb_to_bottom_edge >> (3 + xd->plane[plane].subsampling_y)); + int i; + uint8_t c = *(xd->plane[plane].dst.buf + + (umv_border_start - 1) * xd->plane[plane].dst.stride + x); + + uint8_t *d = xd->plane[plane].dst.buf + + umv_border_start * xd->plane[plane].dst.stride + x; + + if (y + bh > umv_border_start) + for (i = 0; i < bh; i++, d += xd->plane[plane].dst.stride) + *d = c; + } +} +static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + int plane, int ss_tx_size, int eob, int aoff, + int loff, ENTROPY_CONTEXT *A, + ENTROPY_CONTEXT *L) { + const int bw = b_width_log2(bsize), bh = b_height_log2(bsize); + const int sw = bw - xd->plane[plane].subsampling_x; + const int sh = bh - xd->plane[plane].subsampling_y; + int mi_blocks_wide = 1 << sw; + int mi_blocks_high = 1 << sh; + int tx_size_in_blocks = (1 << ss_tx_size); + int above_contexts = tx_size_in_blocks; + int left_contexts = tx_size_in_blocks; + int pt; + + // xd->mb_to_right_edge is in units of pixels * 8. This converts + // it to 4x4 block sizes. + if (xd->mb_to_right_edge < 0) { + mi_blocks_wide += (xd->mb_to_right_edge + >> (5 + xd->plane[plane].subsampling_x)); + } + + // this code attempts to avoid copying into contexts that are outside + // our border. Any blocks that do are set to 0... + if (above_contexts + aoff > mi_blocks_wide) + above_contexts = mi_blocks_wide - aoff; + + if (xd->mb_to_bottom_edge < 0) { + mi_blocks_high += (xd->mb_to_bottom_edge + >> (5 + xd->plane[plane].subsampling_y)); + } + if (left_contexts + loff > mi_blocks_high) { + left_contexts = mi_blocks_high - loff; + } + + for (pt = 0; pt < above_contexts; pt++) + A[pt] = eob > 0; + for (pt = above_contexts; pt < (1 << ss_tx_size); pt++) + A[pt] = 0; + for (pt = 0; pt < left_contexts; pt++) + L[pt] = eob > 0; + for (pt = left_contexts; pt < (1 << ss_tx_size); pt++) + L[pt] = 0; +} + + #endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index f79d1c0ab..224151de4 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -154,17 +154,49 @@ void vp9_find_mv_refs_idx(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here, vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES); vpx_memset(candidate_scores, 0, sizeof(candidate_scores)); - if (mbmi->sb_type == BLOCK_SIZE_SB64X64) { - mv_ref_search = sb64_mv_ref_search; - } else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32) { - mv_ref_search = sb_mv_ref_search; - } else if (mbmi->sb_type >= BLOCK_SIZE_MB16X16) { - mv_ref_search = mb_mv_ref_search; - } else { - mv_ref_search = b_mv_ref_search; - if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { - x_idx = block_idx & 1; - y_idx = block_idx >> 1; + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + int pixels_wide = 4 * b_width_log2(mbmi->sb_type); + int pixels_high = 4 * b_height_log2(mbmi->sb_type); + int pixels_square = 0; + + if (xd->mb_to_right_edge < 0) + pixels_wide += (xd->mb_to_right_edge >> 3); + + if (xd->mb_to_bottom_edge < 0) + pixels_high += (xd->mb_to_bottom_edge >> 3); + + if ( pixels_wide < pixels_high ) + pixels_square = pixels_wide; + else + pixels_square = pixels_high; + + if (pixels_square == 64) { + mv_ref_search = sb64_mv_ref_search; + } else if (pixels_square == 32) { + mv_ref_search = sb_mv_ref_search; + } else if (pixels_square == 16) { + mv_ref_search = mb_mv_ref_search; + } else { + mv_ref_search = b_mv_ref_search; + if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { + x_idx = block_idx & 1; + y_idx = block_idx >> 1; + } + } + } + else { + if (mbmi->sb_type == BLOCK_SIZE_SB64X64) { + mv_ref_search = sb64_mv_ref_search; + } else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32) { + mv_ref_search = sb_mv_ref_search; + } else if (mbmi->sb_type >= BLOCK_SIZE_MB16X16) { + mv_ref_search = mb_mv_ref_search; + } else { + mv_ref_search = b_mv_ref_search; + if (mbmi->sb_type < BLOCK_SIZE_SB8X8) { + x_idx = block_idx & 1; + y_idx = block_idx >> 1; + } } } diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index a9717222a..92c5c9d1b 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -240,6 +240,7 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, mode = plane == 0? xd->mode_info_context->mbmi.mode: xd->mode_info_context->mbmi.uv_mode; + if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8 && plane == 0) { assert(bsize == BLOCK_SIZE_SB8X8); b_mode = xd->mode_info_context->bmi[raster_block].as_mode.first; @@ -247,6 +248,10 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, b_mode = mode; } + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + extend_for_intra(xd, plane, block, bsize, ss_txfrm_size); + } + plane_b_size = b_width_log2(bsize) - xd->plane[plane].subsampling_x; vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, b_mode, dst, xd->plane[plane].dst.stride); diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index b20807226..c91c4fcf8 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -288,9 +288,6 @@ SKIP_START: if (c < seg_eob) coef_counts[type][ref][band][pt][DCT_EOB_MODEL_TOKEN]++; - for (pt = 0; pt < (1 << txfm_size); pt++) { - A[pt] = L[pt] = c > 0; - } return c; } @@ -299,7 +296,6 @@ static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) { return vp9_get_segdata(xd, segment_id, SEG_LVL_SKIP) ? 0 : eob_max; } - struct decode_block_args { VP9D_COMP *pbi; MACROBLOCKD *xd; @@ -314,6 +310,7 @@ static void decode_block(int plane, int block, const int bw = b_width_log2(bsize); // find the maximum eob for this transform size, adjusted by segment + MACROBLOCKD *xd = arg->xd; const int segment_id = arg->xd->mode_info_context->mbmi.segment_id; const TX_SIZE ss_tx_size = ss_txfrm_size / 2; const int seg_eob = get_eob(arg->xd, segment_id, 16 << ss_txfrm_size); @@ -322,14 +319,23 @@ static void decode_block(int plane, int block, const int mod = bw - ss_tx_size - arg->xd->plane[plane].subsampling_x; const int aoff = (off & ((1 << mod) - 1)) << ss_tx_size; const int loff = (off >> mod) << ss_tx_size; - + int pt; + ENTROPY_CONTEXT *A = arg->xd->plane[plane].above_context + aoff; + ENTROPY_CONTEXT *L = arg->xd->plane[plane].left_context + loff; const int eob = decode_coefs(arg->pbi, arg->xd, arg->r, block, arg->xd->plane[plane].plane_type, seg_eob, BLOCK_OFFSET(qcoeff_base, block, 16), ss_tx_size, arg->xd->plane[plane].dequant, - arg->xd->plane[plane].above_context + aoff, - arg->xd->plane[plane].left_context + loff); + A, + L); + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + set_contexts_on_border(xd, bsize, plane, ss_tx_size, eob, aoff, loff, A, L); + } else { + for (pt = 0; pt < (1 << ss_tx_size); pt++) { + A[pt] = L[pt] = eob > 0; + } + } arg->xd->plane[plane].eobs[block] = eob; arg->eobtotal[0] += eob; } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 3ced30d56..6efcdd042 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -467,7 +467,9 @@ static void update_state(VP9_COMP *cpi, int i, j; for (j = 0; j < bh; ++j) for (i = 0; i < bw; ++i) - xd->mode_info_context[mis * j + i].mbmi = *mbmi; + if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + bw > j && + (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + bh > i) + xd->mode_info_context[mis * j + i].mbmi = *mbmi; } if (cpi->common.mcomp_filter_type == SWITCHABLE && @@ -915,13 +917,16 @@ static void set_block_size(VP9_COMMON *const cm, MODE_INFO *m, BLOCK_SIZE_TYPE bsize, int mis, int mi_row, int mi_col) { int row, col; - int bsl = b_width_log2(bsize); + int bwl = b_width_log2(bsize); + int bhl = b_height_log2(bsize); + int bsl = (bwl > bhl ? bwl : bhl); + int bs = (1 << bsl) / 2; // MODE_INFO *m2 = m + mi_row * mis + mi_col; for (row = 0; row < bs; row++) { for (col = 0; col < bs; col++) { if (mi_row + row >= cm->mi_rows || mi_col + col >= cm->mi_cols) - return; + continue; m2[row*mis+col].mbmi.sb_type = bsize; } } @@ -961,21 +966,6 @@ static void fill_variance(var *v, int64_t s2, int64_t s, int c) { / v->count; } -// Fills a 16x16 variance tree node by calling get var8x8 var.. -static void fill_16x16_variance(const unsigned char *s, int sp, - const unsigned char *d, int dp, v16x16 *vt) { - unsigned int sse; - int sum; - vp9_get_sse_sum_8x8(s, sp, d, dp, &sse, &sum); - fill_variance(&vt->split[0].none, sse, sum, 64); - vp9_get_sse_sum_8x8(s + 8, sp, d + 8, dp, &sse, &sum); - fill_variance(&vt->split[1].none, sse, sum, 64); - vp9_get_sse_sum_8x8(s + 8 * sp, sp, d + 8 * dp, dp, &sse, &sum); - fill_variance(&vt->split[2].none, sse, sum, 64); - vp9_get_sse_sum_8x8(s + 8 * sp + 8, sp, d + 8 + 8 * dp, dp, &sse, &sum); - fill_variance(&vt->split[3].none, sse, sum, 64); -} - // Combine 2 variance structures by summing the sum_error, sum_square_error, // and counts and then calculating the new variance. void sum_2_variances(var *r, var *a, var*b) { @@ -1021,8 +1011,18 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, int sp; const unsigned char * d = xd->plane[0].pre->buf; int dp = xd->plane[0].pre->stride; + int pixels_wide = 64, pixels_high = 64; + + vpx_memset(&vt, 0, sizeof(vt)); set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_SB64X64); + + if (xd->mb_to_right_edge < 0) + pixels_wide += (xd->mb_to_right_edge >> 3); + + if (xd->mb_to_bottom_edge < 0) + pixels_high += (xd->mb_to_bottom_edge >> 3); + s = x->plane[0].src.buf; sp = x->plane[0].src.stride; @@ -1034,6 +1034,7 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, d = vp9_64x64_zeros; dp = 64; // } + // Fill in the entire tree of 8x8 variances for splits. for (i = 0; i < 4; i++) { const int x32_idx = ((i & 1) << 5); @@ -1041,8 +1042,28 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, for (j = 0; j < 4; j++) { const int x_idx = x32_idx + ((j & 1) << 4); const int y_idx = y32_idx + ((j >> 1) << 4); - fill_16x16_variance(s + y_idx * sp + x_idx, sp, d + y_idx * dp + x_idx, - dp, &vt.split[i].split[j]); + const uint8_t *st = s + y_idx * sp + x_idx; + const uint8_t *dt = d + y_idx * dp + x_idx; + unsigned int sse = 0; + int sum = 0; + v16x16 *vst = &vt.split[i].split[j]; + sse = sum = 0; + if (x_idx < pixels_wide && y_idx < pixels_high) + vp9_get_sse_sum_8x8(st, sp, dt, dp, &sse, &sum); + fill_variance(&vst->split[0].none, sse, sum, 64); + sse = sum = 0; + if (x_idx + 8 < pixels_wide && y_idx < pixels_high) + vp9_get_sse_sum_8x8(st + 8, sp, dt + 8, dp, &sse, &sum); + fill_variance(&vst->split[1].none, sse, sum, 64); + sse = sum = 0; + if (x_idx < pixels_wide && y_idx + 8 < pixels_high) + vp9_get_sse_sum_8x8(st + 8 * sp, sp, dt + 8 * dp, dp, &sse, &sum); + fill_variance(&vst->split[2].none, sse, sum, 64); + sse = sum = 0; + if (x_idx + 8 < pixels_wide && y_idx + 8 < pixels_high) + vp9_get_sse_sum_8x8(st + 8 * sp + 8, sp, dt + 8 + 8 * dp, dp, &sse, + &sum); + fill_variance(&vst->split[3].none, sse, sum, 64); } } // Fill the rest of the variance tree by summing the split partition @@ -1088,8 +1109,10 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, MACROBLOCK * const x = &cpi->mb; MACROBLOCKD *xd = &cpi->mb.e_mbd; const int mis = cm->mode_info_stride; - int bwl, bhl; + int bwl = b_width_log2(m->mbmi.sb_type); + int bhl = b_height_log2(m->mbmi.sb_type); int bsl = b_width_log2(bsize); + int bh = (1 << bhl); int bs = (1 << bsl); int bss = (1 << bsl)/4; int i, pl; @@ -1103,9 +1126,6 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, return; - bwl = b_width_log2(m->mbmi.sb_type); - bhl = b_height_log2(m->mbmi.sb_type); - // parse the partition type if ((bwl == bsl) && (bhl == bsl)) partition = PARTITION_NONE; @@ -1144,7 +1164,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, *(get_sb_index(xd, subsize)) = 0; pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize, get_block_context(x, subsize)); - if (mi_row + (bs >> 1) <= cm->mi_rows) { + if (mi_row + (bh >> 1) <= cm->mi_rows) { int rt, dt; update_state(cpi, get_block_context(x, subsize), subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); @@ -1404,18 +1424,13 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end; mi_col += 8) { int dummy_rate, dummy_dist; - // TODO(JBB): remove the border conditions for 64x64 blocks once its fixed - // without this border check choose will fail on the border of every - // non 64x64. - if (cpi->speed < 5 || - mi_col + 8 > cm->cur_tile_mi_col_end || - mi_row + 8 > cm->cur_tile_mi_row_end) { + if (cpi->speed < 5) { rd_pick_partition(cpi, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, &dummy_rate, &dummy_dist); } else { const int idx_str = cm->mode_info_stride * mi_row + mi_col; MODE_INFO *m = cm->mi + idx_str; - // set_partitioning(cpi, m, BLOCK_SIZE_SB8X8); + // set_partitioning(cpi, m, BLOCK_SIZE_SB64X64); choose_partitioning(cpi, cm->mi, mi_row, mi_col); rd_use_partition(cpi, m, tp, mi_row, mi_col, BLOCK_SIZE_SB64X64, &dummy_rate, &dummy_dist); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 389c5d860..b65b2619b 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -615,6 +615,10 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize, TX_TYPE tx_type; int mode, b_mode; + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + extend_for_intra(xd, plane, block, bsize, ss_txfrm_size); + } + mode = plane == 0? mbmi->mode: mbmi->uv_mode; if (plane == 0 && mbmi->sb_type < BLOCK_SIZE_SB8X8 && @@ -684,7 +688,6 @@ void vp9_encode_intra_block_uv(VP9_COMMON *cm, MACROBLOCK *x, MACROBLOCKD* const xd = &x->e_mbd; struct optimize_ctx ctx; struct encode_b_args arg = {cm, x, &ctx}; - foreach_transformed_block_uv(xd, bsize, encode_block_intra, &arg); } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 761bbb6d5..c48f34ad7 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -505,33 +505,48 @@ static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { return sum > INT_MAX ? INT_MAX : (int)sum; } -static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x, - int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { - MACROBLOCKD *const xd = &x->e_mbd; +struct rdcost_block_args { + VP9_COMMON *cm; + MACROBLOCK *x; + ENTROPY_CONTEXT t_above[16]; + ENTROPY_CONTEXT t_left[16]; + TX_SIZE tx_size; + int bw; + int bh; + int cost; +}; + +static void rdcost_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct rdcost_block_args* args = arg; + int x_idx, y_idx; + MACROBLOCKD * const xd = &args->x->e_mbd; + + txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx, + &y_idx); + + args->cost += cost_coeffs(args->cm, args->x, plane, block, + xd->plane[plane].plane_type, args->t_above + x_idx, + args->t_left + y_idx, args->tx_size, + args->bw * args->bh); +} + +static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane, + BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { + MACROBLOCKD * const xd = &x->e_mbd; const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; const int bw = 1 << bwl, bh = 1 << bhl; - ENTROPY_CONTEXT t_above[16], t_left[16]; - int block, cost; + struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 0 }; - vpx_memcpy(&t_above, xd->plane[plane].above_context, + vpx_memcpy(&args.t_above, xd->plane[plane].above_context, sizeof(ENTROPY_CONTEXT) * bw); - vpx_memcpy(&t_left, xd->plane[plane].left_context, + vpx_memcpy(&args.t_left, xd->plane[plane].left_context, sizeof(ENTROPY_CONTEXT) * bh); - cost = 0; - for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) { - int x_idx, y_idx; + foreach_transformed_block_in_plane(xd, bsize, plane, rdcost_block, &args); - txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2, - &x_idx, &y_idx); - - cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type, - t_above + x_idx, t_left + y_idx, - tx_size, bw * bh); - } - - return cost; + return args.cost; } static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x, @@ -582,6 +597,7 @@ static void super_block_yrd(VP9_COMP *cpi, } else { mbmi->txfm_size = TX_4X4; } + vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t)); super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs, mbmi->txfm_size); return; @@ -826,6 +842,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t local_txfm_cache[NB_TXFM_MODES]; MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; + if (cpi->common.frame_type == KEY_FRAME) { const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis); const MB_PREDICTION_MODE L = xd->left_available ? @@ -2410,6 +2427,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t err4x4 = INT64_MAX; int i; + vpx_memset(&txfm_cache,0,sizeof(txfm_cache)); ctx->skip = 0; xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME; @@ -2502,6 +2520,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int_mv seg_mvs[4][MAX_REF_FRAMES]; union b_mode_info best_bmodes[4]; PARTITION_INFO best_partition; + int bwsl = b_width_log2(bsize); + int bws = (1 << bwsl) / 4; // mode_info step for subsize + int bhsl = b_width_log2(bsize); + int bhs = (1 << bhsl) / 4; // mode_info step for subsize for (i = 0; i < 4; i++) { int j; @@ -2723,6 +2745,15 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } } + // TODO(JBB): This is to make up for the fact that we don't have sad + // functions that work when the block size reads outside the umv. We + // should fix this either by making the motion search just work on + // a representative block in the boundary ( first ) and then implement a + // function that does sads when inside the border.. + if (((mi_row + bhs) < cm->mi_rows || (mi_col + bws) < cm->mi_cols) && + this_mode == NEWMV) { + continue; + } if (this_mode == I4X4_PRED) { int rate; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 3d8390b08..b307c54b0 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -99,6 +99,7 @@ struct tokenize_b_args { TX_SIZE tx_size; int dry_run; }; + static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, int ss_txfrm_size, void *arg) { struct tokenize_b_args* const args = arg; @@ -233,8 +234,12 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, } while (c < eob && ++c < seg_eob); *tp = t; - for (pt = 0; pt < (1 << tx_size); pt++) { - A[pt] = L[pt] = c > 0; + if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) { + set_contexts_on_border(xd, bsize, plane, tx_size, c, aoff, loff, A, L); + } else { + for (pt = 0; pt < (1 << tx_size); pt++) { + A[pt] = L[pt] = c > 0; + } } }