From 0d793ccfb6da3fe6eeeb19e37ad06427b3779b68 Mon Sep 17 00:00:00 2001 From: Johann Date: Fri, 30 Nov 2012 12:25:01 -0800 Subject: [PATCH 01/77] Update ARM for vpx_scale changes Refactor asm_offsets for vpx_scale. Change-Id: I2db0eeb28c8e757bd033c6614a1e5319a1a204a5 --- libs.mk | 2 +- vp8/common/asm_com_offsets.c | 19 --------- .../arm/neon/vp8_vpxyv12_copy_y_neon.asm | 2 +- .../neon/vp8_vpxyv12_copyframe_func_neon.asm | 2 +- .../vp8_vpxyv12_copysrcframe_func_neon.asm | 2 +- .../vp8_vpxyv12_extendframeborders_neon.asm | 2 +- vpx_scale/arm/neon/yv12extend_arm.c | 2 +- vpx_scale/vpx_scale.mk | 3 ++ vpx_scale/vpx_scale_asm_offsets.c | 40 +++++++++++++++++++ 9 files changed, 49 insertions(+), 25 deletions(-) create mode 100644 vpx_scale/vpx_scale_asm_offsets.c diff --git a/libs.mk b/libs.mk index 58151a66d..c54522159 100644 --- a/libs.mk +++ b/libs.mk @@ -80,7 +80,7 @@ ifeq ($(CONFIG_VP8_ENCODER),yes) CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS)) CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS)) CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h - CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp88cx_arm.mk + CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp8cx_arm.mk INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/% CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h diff --git a/vp8/common/asm_com_offsets.c b/vp8/common/asm_com_offsets.c index ae22b5f6b..7bab90f82 100644 --- a/vp8/common/asm_com_offsets.c +++ b/vp8/common/asm_com_offsets.c @@ -12,7 +12,6 @@ #include "vpx_config.h" #include "vpx/vpx_codec.h" #include "vpx_ports/asm_offsets.h" -#include "vpx_scale/yv12config.h" #include "vp8/common/blockd.h" #if CONFIG_POSTPROC @@ -21,19 +20,6 @@ BEGIN -/* vpx_scale */ -DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width)); -DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height)); -DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride)); -DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width)); -DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height)); -DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride)); -DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer)); -DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer)); -DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer)); -DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border)); -DEFINE(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS); - #if CONFIG_POSTPROC /* mfqe.c / filter_by_weight */ DEFINE(MFQE_PRECISION_VAL, MFQE_PRECISION); @@ -58,11 +44,6 @@ ct_assert(B_HD_PRED, B_HD_PRED == 8); ct_assert(B_HU_PRED, B_HU_PRED == 9); #endif -#if HAVE_NEON -/* vp8_yv12_extend_frame_borders_neon makes several assumptions based on this */ -ct_assert(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS == 32) -#endif - #if HAVE_SSE2 #if CONFIG_POSTPROC /* vp8_filter_by_weight16x16 and 8x8 */ diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm index 9189641b1..cc1789a82 100644 --- a/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm +++ b/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm @@ -15,7 +15,7 @@ REQUIRE8 PRESERVE8 - INCLUDE asm_com_offsets.asm + INCLUDE vpx_scale_asm_offsets.asm AREA ||.text||, CODE, READONLY, ALIGN=2 diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm index e55d076d9..3f17883b4 100644 --- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm +++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm @@ -14,7 +14,7 @@ REQUIRE8 PRESERVE8 - INCLUDE asm_com_offsets.asm + INCLUDE vpx_scale_asm_offsets.asm AREA ||.text||, CODE, READONLY, ALIGN=2 diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm index ec64dbc4e..d452ad255 100644 --- a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm +++ b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm @@ -14,7 +14,7 @@ REQUIRE8 PRESERVE8 - INCLUDE asm_com_offsets.asm + INCLUDE vpx_scale_asm_offsets.asm AREA ||.text||, CODE, READONLY, ALIGN=2 ;Note: This function is used to copy source data in src_buffer[i] at beginning of diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm index ebc4242b2..b2eb9eb0f 100644 --- a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm +++ b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm @@ -14,7 +14,7 @@ REQUIRE8 PRESERVE8 - INCLUDE asm_com_offsets.asm + INCLUDE vpx_scale_asm_offsets.asm AREA ||.text||, CODE, READONLY, ALIGN=2 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf); diff --git a/vpx_scale/arm/neon/yv12extend_arm.c b/vpx_scale/arm/neon/yv12extend_arm.c index eabd4951a..4535b8fa1 100644 --- a/vpx_scale/arm/neon/yv12extend_arm.c +++ b/vpx_scale/arm/neon/yv12extend_arm.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "./vpx_rtcd.h" +#include "./vpx_scale_rtcd.h" extern void vp8_yv12_copy_frame_func_neon(struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk index 6c93f9f19..5e04a8090 100644 --- a/vpx_scale/vpx_scale.mk +++ b/vpx_scale/vpx_scale.mk @@ -17,4 +17,7 @@ SCALE_SRCS-$(HAVE_NEON) += arm/neon/yv12extend_arm.c SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes) +$(eval $(call asm_offsets_template,\ + vpx_scale_asm_offsets.asm, vpx_scale/vpx_scale_asm_offsets.c)) + $(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.sh)) diff --git a/vpx_scale/vpx_scale_asm_offsets.c b/vpx_scale/vpx_scale_asm_offsets.c new file mode 100644 index 000000000..caa9e80ff --- /dev/null +++ b/vpx_scale/vpx_scale_asm_offsets.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "./vpx_config.h" +#include "vpx/vpx_codec.h" +#include "vpx_ports/asm_offsets.h" +#include "vpx_scale/yv12config.h" + +BEGIN + +/* vpx_scale */ +DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width)); +DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height)); +DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride)); +DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width)); +DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height)); +DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride)); +DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer)); +DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer)); +DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer)); +DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border)); +DEFINE(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS); + +END + +/* add asserts for any offset that is not supported by assembly code */ +/* add asserts for any size that is not supported by assembly code */ + +#if HAVE_NEON +/* vp8_yv12_extend_frame_borders_neon makes several assumptions based on this */ +ct_assert(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS == 32) +#endif From 34591b54dda4a25f42aa5add71b625b2600f6d6a Mon Sep 17 00:00:00 2001 From: Johann Date: Sun, 2 Dec 2012 14:14:00 -0800 Subject: [PATCH 02/77] Remove ARM optimizations from VP9 Change-Id: I9f0ae635fb9a95c4aa1529c177ccb07e2b76970b --- libs.mk | 1 - vp8/vp8dx.mk | 24 - .../arm/armv6/vp9_bilinearfilter_v6.asm | 237 --- vp9/common/arm/armv6/vp9_copymem16x16_v6.asm | 186 --- vp9/common/arm/armv6/vp9_copymem8x4_v6.asm | 128 -- vp9/common/arm/armv6/vp9_copymem8x8_v6.asm | 128 -- .../arm/armv6/vp9_dc_only_idct_add_v6.asm | 67 - vp9/common/arm/armv6/vp9_filter_v6.asm | 624 -------- vp9/common/arm/armv6/vp9_idct_v6.asm | 345 ----- vp9/common/arm/armv6/vp9_iwalsh_v6.asm | 152 -- vp9/common/arm/armv6/vp9_loopfilter_v6.asm | 1282 ----------------- vp9/common/arm/armv6/vp9_recon_v6.asm | 281 ---- .../arm/armv6/vp9_simpleloopfilter_v6.asm | 286 ---- .../arm/armv6/vp9_sixtappredict8x4_v6.asm | 273 ---- .../neon/vp9_bilinearpredict16x16_neon.asm | 357 ----- .../arm/neon/vp9_bilinearpredict4x4_neon.asm | 130 -- .../arm/neon/vp9_bilinearpredict8x4_neon.asm | 135 -- .../arm/neon/vp9_bilinearpredict8x8_neon.asm | 183 --- .../neon/vp9_buildintrapredictorsmby_neon.asm | 584 -------- vp9/common/arm/neon/vp9_copymem16x16_neon.asm | 59 - vp9/common/arm/neon/vp9_copymem8x4_neon.asm | 34 - vp9/common/arm/neon/vp9_copymem8x8_neon.asm | 43 - .../arm/neon/vp9_dc_only_idct_add_neon.asm | 49 - vp9/common/arm/neon/vp9_iwalsh_neon.asm | 80 - vp9/common/arm/neon/vp9_loopfilter_neon.asm | 397 ----- ...p9_loopfiltersimplehorizontaledge_neon.asm | 117 -- .../vp9_loopfiltersimpleverticaledge_neon.asm | 154 -- vp9/common/arm/neon/vp9_mbloopfilter_neon.asm | 469 ------ vp9/common/arm/neon/vp9_recon16x16mb_neon.asm | 131 -- vp9/common/arm/neon/vp9_recon2b_neon.asm | 54 - vp9/common/arm/neon/vp9_recon4b_neon.asm | 69 - vp9/common/arm/neon/vp9_recon_neon.c | 29 - vp9/common/arm/neon/vp9_reconb_neon.asm | 61 - vp9/common/arm/neon/vp9_save_neon_reg.asm | 36 - .../arm/neon/vp9_shortidct4x4llm_1_neon.asm | 67 - .../arm/neon/vp9_shortidct4x4llm_neon.asm | 122 -- .../arm/neon/vp9_sixtappredict16x16_neon.asm | 490 ------- .../arm/neon/vp9_sixtappredict4x4_neon.asm | 422 ------ .../arm/neon/vp9_sixtappredict8x4_neon.asm | 473 ------ .../arm/neon/vp9_sixtappredict8x8_neon.asm | 524 ------- vp9/common/arm/vp9_arm_systemdependent.c | 91 -- vp9/common/arm/vp9_bilinearfilter_arm.c | 108 -- vp9/common/arm/vp9_bilinearfilter_arm.h | 35 - vp9/common/arm/vp9_filter_arm.c | 198 --- vp9/common/arm/vp9_idct_arm.h | 65 - vp9/common/arm/vp9_loopfilter_arm.c | 166 --- vp9/common/arm/vp9_loopfilter_arm.h | 41 - vp9/common/arm/vp9_recon_arm.h | 90 -- vp9/common/arm/vp9_reconintra_arm.c | 62 - vp9/common/arm/vp9_subpixel_arm.h | 89 -- vp9/common/vp9_asm_com_offsets.c | 19 - vp9/common/vp9_loopfilter.h | 8 - vp9/common/vp9_rtcd_defs.sh | 33 +- .../arm/armv6/vp9_dequant_dc_idct_v6.asm | 218 --- vp9/decoder/arm/armv6/vp9_dequant_idct_v6.asm | 196 --- vp9/decoder/arm/armv6/vp9_dequantize_v6.asm | 69 - vp9/decoder/arm/armv6/vp9_idct_blk_v6.c | 137 -- .../arm/neon/vp9_dequant_idct_neon.asm | 129 -- vp9/decoder/arm/neon/vp9_dequantizeb_neon.asm | 34 - vp9/decoder/arm/neon/vp9_idct_blk_neon.c | 113 -- .../arm/neon/vp9_idct_dequant_0_2x_neon.asm | 79 - .../neon/vp9_idct_dequant_dc_0_2x_neon.asm | 69 - .../neon/vp9_idct_dequant_dc_full_2x_neon.asm | 205 --- .../neon/vp9_idct_dequant_full_2x_neon.asm | 197 --- vp9/decoder/arm/vp9_dequantize_arm.c | 44 - vp9/decoder/vp9_onyxd_if.c | 52 - .../arm/armv5te/vp9_boolhuff_armv5te.asm | 286 ---- .../arm/armv5te/vp9_packtokens_armv5.asm | 291 ---- .../armv5te/vp9_packtokens_mbrow_armv5.asm | 327 ----- .../vp9_packtokens_partitions_armv5.asm | 465 ------ .../arm/armv6/vp9_fast_quantize_b_armv6.asm | 223 --- vp9/encoder/arm/armv6/vp9_mse16x16_armv6.asm | 138 -- vp9/encoder/arm/armv6/vp9_sad16x16_armv6.asm | 95 -- .../arm/armv6/vp9_short_fdct4x4_armv6.asm | 262 ---- vp9/encoder/arm/armv6/vp9_subtract_armv6.asm | 264 ---- .../arm/armv6/vp9_variance16x16_armv6.asm | 153 -- .../arm/armv6/vp9_variance8x8_armv6.asm | 101 -- .../vp9_variance_halfpixvar16x16_h_armv6.asm | 181 --- .../vp9_variance_halfpixvar16x16_hv_armv6.asm | 222 --- .../vp9_variance_halfpixvar16x16_v_armv6.asm | 183 --- vp9/encoder/arm/armv6/vp9_walsh_v6.asm | 212 --- .../arm/neon/vp9_fastquantizeb_neon.asm | 261 ---- vp9/encoder/arm/neon/vp9_memcpy_neon.asm | 68 - vp9/encoder/arm/neon/vp9_mse16x16_neon.asm | 116 -- vp9/encoder/arm/neon/vp9_picklpf_arm.c | 48 - vp9/encoder/arm/neon/vp9_sad16_neon.asm | 207 --- vp9/encoder/arm/neon/vp9_sad8_neon.asm | 209 --- vp9/encoder/arm/neon/vp9_shortfdct_neon.asm | 221 --- .../arm/neon/vp9_shortwalsh4x4_neon.asm | 103 -- .../neon/vp9_subpixelvariance16x16_neon.asm | 425 ------ .../neon/vp9_subpixelvariance16x16s_neon.asm | 572 -------- .../arm/neon/vp9_subpixelvariance8x8_neon.asm | 224 --- vp9/encoder/arm/neon/vp9_subtract_neon.asm | 185 --- vp9/encoder/arm/neon/vp9_variance_neon.asm | 276 ---- vp9/encoder/arm/vp9_arm_csystemdependent.c | 129 -- vp9/encoder/arm/vp9_boolhuff_arm.c | 33 - vp9/encoder/arm/vp9_dct_arm.c | 21 - vp9/encoder/arm/vp9_dct_arm.h | 65 - vp9/encoder/arm/vp9_encodemb_arm.h | 64 - vp9/encoder/arm/vp9_quantize_arm.c | 57 - vp9/encoder/arm/vp9_quantize_arm.h | 52 - vp9/encoder/arm/vp9_variance_arm.c | 112 -- vp9/encoder/arm/vp9_variance_arm.h | 132 -- vp9/encoder/vp9_asm_enc_offsets.c | 8 - vp9/encoder/vp9_onyx_if.c | 68 - vp9/encoder/vp9_picklpf.c | 76 +- vp9/encoder/vp9_quantize.h | 4 - vp9/vp9_common.mk | 54 - vp9/vp9cx.mk | 4 - vp9/vp9cx_arm.mk | 63 - vp9/vp9dx.mk | 28 - vp9/vp9dx_arm.mk | 29 - 112 files changed, 15 insertions(+), 19132 deletions(-) delete mode 100644 vp9/common/arm/armv6/vp9_bilinearfilter_v6.asm delete mode 100644 vp9/common/arm/armv6/vp9_copymem16x16_v6.asm delete mode 100644 vp9/common/arm/armv6/vp9_copymem8x4_v6.asm delete mode 100644 vp9/common/arm/armv6/vp9_copymem8x8_v6.asm delete mode 100644 vp9/common/arm/armv6/vp9_dc_only_idct_add_v6.asm delete mode 100644 vp9/common/arm/armv6/vp9_filter_v6.asm delete mode 100644 vp9/common/arm/armv6/vp9_idct_v6.asm delete mode 100644 vp9/common/arm/armv6/vp9_iwalsh_v6.asm delete mode 100644 vp9/common/arm/armv6/vp9_loopfilter_v6.asm delete mode 100644 vp9/common/arm/armv6/vp9_recon_v6.asm delete mode 100644 vp9/common/arm/armv6/vp9_simpleloopfilter_v6.asm delete mode 100644 vp9/common/arm/armv6/vp9_sixtappredict8x4_v6.asm delete mode 100644 vp9/common/arm/neon/vp9_bilinearpredict16x16_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_bilinearpredict4x4_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_bilinearpredict8x4_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_bilinearpredict8x8_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_buildintrapredictorsmby_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_copymem16x16_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_copymem8x4_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_copymem8x8_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_dc_only_idct_add_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_iwalsh_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_loopfilter_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_loopfiltersimplehorizontaledge_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_loopfiltersimpleverticaledge_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_mbloopfilter_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_recon16x16mb_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_recon2b_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_recon4b_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_recon_neon.c delete mode 100644 vp9/common/arm/neon/vp9_reconb_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_save_neon_reg.asm delete mode 100644 vp9/common/arm/neon/vp9_shortidct4x4llm_1_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_shortidct4x4llm_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_sixtappredict16x16_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_sixtappredict4x4_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_sixtappredict8x4_neon.asm delete mode 100644 vp9/common/arm/neon/vp9_sixtappredict8x8_neon.asm delete mode 100644 vp9/common/arm/vp9_arm_systemdependent.c delete mode 100644 vp9/common/arm/vp9_bilinearfilter_arm.c delete mode 100644 vp9/common/arm/vp9_bilinearfilter_arm.h delete mode 100644 vp9/common/arm/vp9_filter_arm.c delete mode 100644 vp9/common/arm/vp9_idct_arm.h delete mode 100644 vp9/common/arm/vp9_loopfilter_arm.c delete mode 100644 vp9/common/arm/vp9_loopfilter_arm.h delete mode 100644 vp9/common/arm/vp9_recon_arm.h delete mode 100644 vp9/common/arm/vp9_reconintra_arm.c delete mode 100644 vp9/common/arm/vp9_subpixel_arm.h delete mode 100644 vp9/decoder/arm/armv6/vp9_dequant_dc_idct_v6.asm delete mode 100644 vp9/decoder/arm/armv6/vp9_dequant_idct_v6.asm delete mode 100644 vp9/decoder/arm/armv6/vp9_dequantize_v6.asm delete mode 100644 vp9/decoder/arm/armv6/vp9_idct_blk_v6.c delete mode 100644 vp9/decoder/arm/neon/vp9_dequant_idct_neon.asm delete mode 100644 vp9/decoder/arm/neon/vp9_dequantizeb_neon.asm delete mode 100644 vp9/decoder/arm/neon/vp9_idct_blk_neon.c delete mode 100644 vp9/decoder/arm/neon/vp9_idct_dequant_0_2x_neon.asm delete mode 100644 vp9/decoder/arm/neon/vp9_idct_dequant_dc_0_2x_neon.asm delete mode 100644 vp9/decoder/arm/neon/vp9_idct_dequant_dc_full_2x_neon.asm delete mode 100644 vp9/decoder/arm/neon/vp9_idct_dequant_full_2x_neon.asm delete mode 100644 vp9/decoder/arm/vp9_dequantize_arm.c delete mode 100644 vp9/encoder/arm/armv5te/vp9_boolhuff_armv5te.asm delete mode 100644 vp9/encoder/arm/armv5te/vp9_packtokens_armv5.asm delete mode 100644 vp9/encoder/arm/armv5te/vp9_packtokens_mbrow_armv5.asm delete mode 100644 vp9/encoder/arm/armv5te/vp9_packtokens_partitions_armv5.asm delete mode 100644 vp9/encoder/arm/armv6/vp9_fast_quantize_b_armv6.asm delete mode 100644 vp9/encoder/arm/armv6/vp9_mse16x16_armv6.asm delete mode 100644 vp9/encoder/arm/armv6/vp9_sad16x16_armv6.asm delete mode 100644 vp9/encoder/arm/armv6/vp9_short_fdct4x4_armv6.asm delete mode 100644 vp9/encoder/arm/armv6/vp9_subtract_armv6.asm delete mode 100644 vp9/encoder/arm/armv6/vp9_variance16x16_armv6.asm delete mode 100644 vp9/encoder/arm/armv6/vp9_variance8x8_armv6.asm delete mode 100644 vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_h_armv6.asm delete mode 100644 vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_hv_armv6.asm delete mode 100644 vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_v_armv6.asm delete mode 100644 vp9/encoder/arm/armv6/vp9_walsh_v6.asm delete mode 100644 vp9/encoder/arm/neon/vp9_fastquantizeb_neon.asm delete mode 100644 vp9/encoder/arm/neon/vp9_memcpy_neon.asm delete mode 100644 vp9/encoder/arm/neon/vp9_mse16x16_neon.asm delete mode 100644 vp9/encoder/arm/neon/vp9_picklpf_arm.c delete mode 100644 vp9/encoder/arm/neon/vp9_sad16_neon.asm delete mode 100644 vp9/encoder/arm/neon/vp9_sad8_neon.asm delete mode 100644 vp9/encoder/arm/neon/vp9_shortfdct_neon.asm delete mode 100644 vp9/encoder/arm/neon/vp9_shortwalsh4x4_neon.asm delete mode 100644 vp9/encoder/arm/neon/vp9_subpixelvariance16x16_neon.asm delete mode 100644 vp9/encoder/arm/neon/vp9_subpixelvariance16x16s_neon.asm delete mode 100644 vp9/encoder/arm/neon/vp9_subpixelvariance8x8_neon.asm delete mode 100644 vp9/encoder/arm/neon/vp9_subtract_neon.asm delete mode 100644 vp9/encoder/arm/neon/vp9_variance_neon.asm delete mode 100644 vp9/encoder/arm/vp9_arm_csystemdependent.c delete mode 100644 vp9/encoder/arm/vp9_boolhuff_arm.c delete mode 100644 vp9/encoder/arm/vp9_dct_arm.c delete mode 100644 vp9/encoder/arm/vp9_dct_arm.h delete mode 100644 vp9/encoder/arm/vp9_encodemb_arm.h delete mode 100644 vp9/encoder/arm/vp9_quantize_arm.c delete mode 100644 vp9/encoder/arm/vp9_quantize_arm.h delete mode 100644 vp9/encoder/arm/vp9_variance_arm.c delete mode 100644 vp9/encoder/arm/vp9_variance_arm.h delete mode 100644 vp9/vp9cx_arm.mk delete mode 100644 vp9/vp9dx_arm.mk diff --git a/libs.mk b/libs.mk index c54522159..bd1cd54e3 100644 --- a/libs.mk +++ b/libs.mk @@ -109,7 +109,6 @@ ifeq ($(CONFIG_VP9_ENCODER),yes) CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS)) CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS)) CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h - CODEC_SRCS-$(ARCH_ARM) += $(VP9_PREFIX)vp98cx_arm.mk INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/% CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk index 5753e04e5..b5b90d37d 100644 --- a/vp8/vp8dx.mk +++ b/vp8/vp8dx.mk @@ -18,30 +18,6 @@ VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no) VP8_DX_SRCS-yes += vp8_dx_iface.c -# common -#define ARM -#define DISABLE_THREAD - -#INCLUDES += algo/vpx_common/vpx_mem/include -#INCLUDES += common -#INCLUDES += common -#INCLUDES += common -#INCLUDES += common -#INCLUDES += decoder - - - -# decoder -#define ARM -#define DISABLE_THREAD - -#INCLUDES += algo/vpx_common/vpx_mem/include -#INCLUDES += common -#INCLUDES += common -#INCLUDES += common -#INCLUDES += common -#INCLUDES += decoder - VP8_DX_SRCS-yes += decoder/asm_dec_offsets.c VP8_DX_SRCS-yes += decoder/dboolhuff.c VP8_DX_SRCS-yes += decoder/decodemv.c diff --git a/vp9/common/arm/armv6/vp9_bilinearfilter_v6.asm b/vp9/common/arm/armv6/vp9_bilinearfilter_v6.asm deleted file mode 100644 index 36e391e2b..000000000 --- a/vp9/common/arm/armv6/vp9_bilinearfilter_v6.asm +++ /dev/null @@ -1,237 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_filter_block2d_bil_first_pass_armv6| - EXPORT |vp9_filter_block2d_bil_second_pass_armv6| - - AREA |.text|, CODE, READONLY ; name this block of code - -;------------------------------------- -; r0 unsigned char *src_ptr, -; r1 unsigned short *dst_ptr, -; r2 unsigned int src_pitch, -; r3 unsigned int height, -; stack unsigned int width, -; stack const short *vp9_filter -;------------------------------------- -; The output is transposed stroed in output array to make it easy for second pass filtering. -|vp9_filter_block2d_bil_first_pass_armv6| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r11, [sp, #40] ; vp9_filter address - ldr r4, [sp, #36] ; width - - mov r12, r3 ; outer-loop counter - - add r7, r2, r4 ; preload next row - pld [r0, r7] - - sub r2, r2, r4 ; src increment for height loop - - ldr r5, [r11] ; load up filter coefficients - - mov r3, r3, lsl #1 ; height*2 - add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1) - - mov r11, r1 ; save dst_ptr for each row - - cmp r5, #128 ; if filter coef = 128, then skip the filter - beq bil_null_1st_filter - -|bil_height_loop_1st_v6| - ldrb r6, [r0] ; load source data - ldrb r7, [r0, #1] - ldrb r8, [r0, #2] - mov lr, r4, lsr #2 ; 4-in-parellel loop counter - -|bil_width_loop_1st_v6| - ldrb r9, [r0, #3] - ldrb r10, [r0, #4] - - pkhbt r6, r6, r7, lsl #16 ; src[1] | src[0] - pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1] - - smuad r6, r6, r5 ; apply the filter - pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2] - smuad r7, r7, r5 - pkhbt r9, r9, r10, lsl #16 ; src[4] | src[3] - - smuad r8, r8, r5 - smuad r9, r9, r5 - - add r0, r0, #4 - subs lr, lr, #1 - - add r6, r6, #0x40 ; round_shift_and_clamp - add r7, r7, #0x40 - usat r6, #16, r6, asr #7 - usat r7, #16, r7, asr #7 - - strh r6, [r1], r3 ; result is transposed and stored - - add r8, r8, #0x40 ; round_shift_and_clamp - strh r7, [r1], r3 - add r9, r9, #0x40 - usat r8, #16, r8, asr #7 - usat r9, #16, r9, asr #7 - - strh r8, [r1], r3 ; result is transposed and stored - - ldrneb r6, [r0] ; load source data - strh r9, [r1], r3 - - ldrneb r7, [r0, #1] - ldrneb r8, [r0, #2] - - bne bil_width_loop_1st_v6 - - add r0, r0, r2 ; move to next input row - subs r12, r12, #1 - - add r9, r2, r4, lsl #1 ; adding back block width - pld [r0, r9] ; preload next row - - add r11, r11, #2 ; move over to next column - mov r1, r11 - - bne bil_height_loop_1st_v6 - - ldmia sp!, {r4 - r11, pc} - -|bil_null_1st_filter| -|bil_height_loop_null_1st| - mov lr, r4, lsr #2 ; loop counter - -|bil_width_loop_null_1st| - ldrb r6, [r0] ; load data - ldrb r7, [r0, #1] - ldrb r8, [r0, #2] - ldrb r9, [r0, #3] - - strh r6, [r1], r3 ; store it to immediate buffer - add r0, r0, #4 - strh r7, [r1], r3 - subs lr, lr, #1 - strh r8, [r1], r3 - strh r9, [r1], r3 - - bne bil_width_loop_null_1st - - subs r12, r12, #1 - add r0, r0, r2 ; move to next input line - add r11, r11, #2 ; move over to next column - mov r1, r11 - - bne bil_height_loop_null_1st - - ldmia sp!, {r4 - r11, pc} - - ENDP ; |vp9_filter_block2d_bil_first_pass_armv6| - - -;--------------------------------- -; r0 unsigned short *src_ptr, -; r1 unsigned char *dst_ptr, -; r2 int dst_pitch, -; r3 unsigned int height, -; stack unsigned int width, -; stack const short *vp9_filter -;--------------------------------- -|vp9_filter_block2d_bil_second_pass_armv6| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r11, [sp, #40] ; vp9_filter address - ldr r4, [sp, #36] ; width - - ldr r5, [r11] ; load up filter coefficients - mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix - mov r11, r1 - - cmp r5, #128 ; if filter coef = 128, then skip the filter - beq bil_null_2nd_filter - -|bil_height_loop_2nd| - ldr r6, [r0] ; load the data - ldr r8, [r0, #4] - ldrh r10, [r0, #8] - mov lr, r3, lsr #2 ; loop counter - -|bil_width_loop_2nd| - pkhtb r7, r6, r8 ; src[1] | src[2] - pkhtb r9, r8, r10 ; src[3] | src[4] - - smuad r6, r6, r5 ; apply filter - smuad r8, r8, r5 ; apply filter - - subs lr, lr, #1 - - smuadx r7, r7, r5 ; apply filter - smuadx r9, r9, r5 ; apply filter - - add r0, r0, #8 - - add r6, r6, #0x40 ; round_shift_and_clamp - add r7, r7, #0x40 - usat r6, #8, r6, asr #7 - usat r7, #8, r7, asr #7 - strb r6, [r1], r2 ; the result is transposed back and stored - - add r8, r8, #0x40 ; round_shift_and_clamp - strb r7, [r1], r2 - add r9, r9, #0x40 - usat r8, #8, r8, asr #7 - usat r9, #8, r9, asr #7 - strb r8, [r1], r2 ; the result is transposed back and stored - - ldrne r6, [r0] ; load data - strb r9, [r1], r2 - ldrne r8, [r0, #4] - ldrneh r10, [r0, #8] - - bne bil_width_loop_2nd - - subs r12, r12, #1 - add r0, r0, #4 ; update src for next row - add r11, r11, #1 - mov r1, r11 - - bne bil_height_loop_2nd - ldmia sp!, {r4 - r11, pc} - -|bil_null_2nd_filter| -|bil_height_loop_null_2nd| - mov lr, r3, lsr #2 - -|bil_width_loop_null_2nd| - ldr r6, [r0], #4 ; load data - subs lr, lr, #1 - ldr r8, [r0], #4 - - strb r6, [r1], r2 ; store data - mov r7, r6, lsr #16 - strb r7, [r1], r2 - mov r9, r8, lsr #16 - strb r8, [r1], r2 - strb r9, [r1], r2 - - bne bil_width_loop_null_2nd - - subs r12, r12, #1 - add r0, r0, #4 - add r11, r11, #1 - mov r1, r11 - - bne bil_height_loop_null_2nd - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp9_filter_block2d_second_pass_armv6| - - END diff --git a/vp9/common/arm/armv6/vp9_copymem16x16_v6.asm b/vp9/common/arm/armv6/vp9_copymem16x16_v6.asm deleted file mode 100644 index 44c3c492f..000000000 --- a/vp9/common/arm/armv6/vp9_copymem16x16_v6.asm +++ /dev/null @@ -1,186 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_copy_mem16x16_v6| - ; ARM - ; REQUIRE8 - ; PRESERVE8 - - AREA Block, CODE, READONLY ; name this block of code -;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -|vp9_copy_mem16x16_v6| PROC - stmdb sp!, {r4 - r7} - ;push {r4-r7} - - ;preload - pld [r0, #31] ; preload for next 16x16 block - - ands r4, r0, #15 - beq copy_mem16x16_fast - - ands r4, r0, #7 - beq copy_mem16x16_8 - - ands r4, r0, #3 - beq copy_mem16x16_4 - - ;copy one byte each time - ldrb r4, [r0] - ldrb r5, [r0, #1] - ldrb r6, [r0, #2] - ldrb r7, [r0, #3] - - mov r12, #16 - -copy_mem16x16_1_loop - strb r4, [r2] - strb r5, [r2, #1] - strb r6, [r2, #2] - strb r7, [r2, #3] - - ldrb r4, [r0, #4] - ldrb r5, [r0, #5] - ldrb r6, [r0, #6] - ldrb r7, [r0, #7] - - subs r12, r12, #1 - - strb r4, [r2, #4] - strb r5, [r2, #5] - strb r6, [r2, #6] - strb r7, [r2, #7] - - ldrb r4, [r0, #8] - ldrb r5, [r0, #9] - ldrb r6, [r0, #10] - ldrb r7, [r0, #11] - - strb r4, [r2, #8] - strb r5, [r2, #9] - strb r6, [r2, #10] - strb r7, [r2, #11] - - ldrb r4, [r0, #12] - ldrb r5, [r0, #13] - ldrb r6, [r0, #14] - ldrb r7, [r0, #15] - - add r0, r0, r1 - - strb r4, [r2, #12] - strb r5, [r2, #13] - strb r6, [r2, #14] - strb r7, [r2, #15] - - add r2, r2, r3 - - ldrneb r4, [r0] - ldrneb r5, [r0, #1] - ldrneb r6, [r0, #2] - ldrneb r7, [r0, #3] - - pld [r0, #31] ; preload for next 16x16 block - - bne copy_mem16x16_1_loop - - ldmia sp!, {r4 - r7} - ;pop {r4-r7} - mov pc, lr - -;copy 4 bytes each time -copy_mem16x16_4 - ldr r4, [r0] - ldr r5, [r0, #4] - ldr r6, [r0, #8] - ldr r7, [r0, #12] - - mov r12, #16 - -copy_mem16x16_4_loop - subs r12, r12, #1 - add r0, r0, r1 - - str r4, [r2] - str r5, [r2, #4] - str r6, [r2, #8] - str r7, [r2, #12] - - add r2, r2, r3 - - ldrne r4, [r0] - ldrne r5, [r0, #4] - ldrne r6, [r0, #8] - ldrne r7, [r0, #12] - - pld [r0, #31] ; preload for next 16x16 block - - bne copy_mem16x16_4_loop - - ldmia sp!, {r4 - r7} - ;pop {r4-r7} - mov pc, lr - -;copy 8 bytes each time -copy_mem16x16_8 - sub r1, r1, #16 - sub r3, r3, #16 - - mov r12, #16 - -copy_mem16x16_8_loop - ldmia r0!, {r4-r5} - ;ldm r0, {r4-r5} - ldmia r0!, {r6-r7} - - add r0, r0, r1 - - stmia r2!, {r4-r5} - subs r12, r12, #1 - ;stm r2, {r4-r5} - stmia r2!, {r6-r7} - - add r2, r2, r3 - - pld [r0, #31] ; preload for next 16x16 block - bne copy_mem16x16_8_loop - - ldmia sp!, {r4 - r7} - ;pop {r4-r7} - mov pc, lr - -;copy 16 bytes each time -copy_mem16x16_fast - ;sub r1, r1, #16 - ;sub r3, r3, #16 - - mov r12, #16 - -copy_mem16x16_fast_loop - ldmia r0, {r4-r7} - ;ldm r0, {r4-r7} - add r0, r0, r1 - - subs r12, r12, #1 - stmia r2, {r4-r7} - ;stm r2, {r4-r7} - add r2, r2, r3 - - pld [r0, #31] ; preload for next 16x16 block - bne copy_mem16x16_fast_loop - - ldmia sp!, {r4 - r7} - ;pop {r4-r7} - mov pc, lr - - ENDP ; |vp9_copy_mem16x16_v6| - - END diff --git a/vp9/common/arm/armv6/vp9_copymem8x4_v6.asm b/vp9/common/arm/armv6/vp9_copymem8x4_v6.asm deleted file mode 100644 index 45b904367..000000000 --- a/vp9/common/arm/armv6/vp9_copymem8x4_v6.asm +++ /dev/null @@ -1,128 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_copy_mem8x4_v6| - ; ARM - ; REQUIRE8 - ; PRESERVE8 - - AREA Block, CODE, READONLY ; name this block of code -;void vp9_copy_mem8x4_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -|vp9_copy_mem8x4_v6| PROC - ;push {r4-r5} - stmdb sp!, {r4-r5} - - ;preload - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - ands r4, r0, #7 - beq copy_mem8x4_fast - - ands r4, r0, #3 - beq copy_mem8x4_4 - - ;copy 1 byte each time - ldrb r4, [r0] - ldrb r5, [r0, #1] - - mov r12, #4 - -copy_mem8x4_1_loop - strb r4, [r2] - strb r5, [r2, #1] - - ldrb r4, [r0, #2] - ldrb r5, [r0, #3] - - subs r12, r12, #1 - - strb r4, [r2, #2] - strb r5, [r2, #3] - - ldrb r4, [r0, #4] - ldrb r5, [r0, #5] - - strb r4, [r2, #4] - strb r5, [r2, #5] - - ldrb r4, [r0, #6] - ldrb r5, [r0, #7] - - add r0, r0, r1 - - strb r4, [r2, #6] - strb r5, [r2, #7] - - add r2, r2, r3 - - ldrneb r4, [r0] - ldrneb r5, [r0, #1] - - bne copy_mem8x4_1_loop - - ldmia sp!, {r4 - r5} - ;pop {r4-r5} - mov pc, lr - -;copy 4 bytes each time -copy_mem8x4_4 - ldr r4, [r0] - ldr r5, [r0, #4] - - mov r12, #4 - -copy_mem8x4_4_loop - subs r12, r12, #1 - add r0, r0, r1 - - str r4, [r2] - str r5, [r2, #4] - - add r2, r2, r3 - - ldrne r4, [r0] - ldrne r5, [r0, #4] - - bne copy_mem8x4_4_loop - - ldmia sp!, {r4-r5} - ;pop {r4-r5} - mov pc, lr - -;copy 8 bytes each time -copy_mem8x4_fast - ;sub r1, r1, #8 - ;sub r3, r3, #8 - - mov r12, #4 - -copy_mem8x4_fast_loop - ldmia r0, {r4-r5} - ;ldm r0, {r4-r5} - add r0, r0, r1 - - subs r12, r12, #1 - stmia r2, {r4-r5} - ;stm r2, {r4-r5} - add r2, r2, r3 - - bne copy_mem8x4_fast_loop - - ldmia sp!, {r4-r5} - ;pop {r4-r5} - mov pc, lr - - ENDP ; |vp9_copy_mem8x4_v6| - - END diff --git a/vp9/common/arm/armv6/vp9_copymem8x8_v6.asm b/vp9/common/arm/armv6/vp9_copymem8x8_v6.asm deleted file mode 100644 index 0dd971bfe..000000000 --- a/vp9/common/arm/armv6/vp9_copymem8x8_v6.asm +++ /dev/null @@ -1,128 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_copy_mem8x8_v6| - ; ARM - ; REQUIRE8 - ; PRESERVE8 - - AREA Block, CODE, READONLY ; name this block of code -;void copy_mem8x8_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -|vp9_copy_mem8x8_v6| PROC - ;push {r4-r5} - stmdb sp!, {r4-r5} - - ;preload - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - ands r4, r0, #7 - beq copy_mem8x8_fast - - ands r4, r0, #3 - beq copy_mem8x8_4 - - ;copy 1 byte each time - ldrb r4, [r0] - ldrb r5, [r0, #1] - - mov r12, #8 - -copy_mem8x8_1_loop - strb r4, [r2] - strb r5, [r2, #1] - - ldrb r4, [r0, #2] - ldrb r5, [r0, #3] - - subs r12, r12, #1 - - strb r4, [r2, #2] - strb r5, [r2, #3] - - ldrb r4, [r0, #4] - ldrb r5, [r0, #5] - - strb r4, [r2, #4] - strb r5, [r2, #5] - - ldrb r4, [r0, #6] - ldrb r5, [r0, #7] - - add r0, r0, r1 - - strb r4, [r2, #6] - strb r5, [r2, #7] - - add r2, r2, r3 - - ldrneb r4, [r0] - ldrneb r5, [r0, #1] - - bne copy_mem8x8_1_loop - - ldmia sp!, {r4 - r5} - ;pop {r4-r5} - mov pc, lr - -;copy 4 bytes each time -copy_mem8x8_4 - ldr r4, [r0] - ldr r5, [r0, #4] - - mov r12, #8 - -copy_mem8x8_4_loop - subs r12, r12, #1 - add r0, r0, r1 - - str r4, [r2] - str r5, [r2, #4] - - add r2, r2, r3 - - ldrne r4, [r0] - ldrne r5, [r0, #4] - - bne copy_mem8x8_4_loop - - ldmia sp!, {r4 - r5} - ;pop {r4-r5} - mov pc, lr - -;copy 8 bytes each time -copy_mem8x8_fast - ;sub r1, r1, #8 - ;sub r3, r3, #8 - - mov r12, #8 - -copy_mem8x8_fast_loop - ldmia r0, {r4-r5} - ;ldm r0, {r4-r5} - add r0, r0, r1 - - subs r12, r12, #1 - stmia r2, {r4-r5} - ;stm r2, {r4-r5} - add r2, r2, r3 - - bne copy_mem8x8_fast_loop - - ldmia sp!, {r4-r5} - ;pop {r4-r5} - mov pc, lr - - ENDP ; |vp9_copy_mem8x8_v6| - - END diff --git a/vp9/common/arm/armv6/vp9_dc_only_idct_add_v6.asm b/vp9/common/arm/armv6/vp9_dc_only_idct_add_v6.asm deleted file mode 100644 index e0660e9fd..000000000 --- a/vp9/common/arm/armv6/vp9_dc_only_idct_add_v6.asm +++ /dev/null @@ -1,67 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - EXPORT |vp8_dc_only_idct_add_v6| - - AREA |.text|, CODE, READONLY - -;void vp8_dc_only_idct_add_v6(short input_dc, unsigned char *pred_ptr, -; unsigned char *dst_ptr, int pitch, int stride) -; r0 input_dc -; r1 pred_ptr -; r2 dest_ptr -; r3 pitch -; sp stride - -|vp8_dc_only_idct_add_v6| PROC - stmdb sp!, {r4 - r7, lr} - - add r0, r0, #4 ; input_dc += 4 - ldr r12, c0x0000FFFF - ldr r4, [r1], r3 - ldr r6, [r1], r3 - and r0, r12, r0, asr #3 ; input_dc >> 3 + mask - ldr lr, [sp, #20] - orr r0, r0, r0, lsl #16 ; a1 | a1 - - uxtab16 r5, r0, r4 ; a1+2 | a1+0 - uxtab16 r4, r0, r4, ror #8 ; a1+3 | a1+1 - uxtab16 r7, r0, r6 - uxtab16 r6, r0, r6, ror #8 - usat16 r5, #8, r5 - usat16 r4, #8, r4 - usat16 r7, #8, r7 - usat16 r6, #8, r6 - orr r5, r5, r4, lsl #8 - orr r7, r7, r6, lsl #8 - ldr r4, [r1], r3 - ldr r6, [r1] - str r5, [r2], lr - str r7, [r2], lr - - uxtab16 r5, r0, r4 - uxtab16 r4, r0, r4, ror #8 - uxtab16 r7, r0, r6 - uxtab16 r6, r0, r6, ror #8 - usat16 r5, #8, r5 - usat16 r4, #8, r4 - usat16 r7, #8, r7 - usat16 r6, #8, r6 - orr r5, r5, r4, lsl #8 - orr r7, r7, r6, lsl #8 - str r5, [r2], lr - str r7, [r2] - - ldmia sp!, {r4 - r7, pc} - - ENDP ; |vp8_dc_only_idct_add_v6| - -; Constant Pool -c0x0000FFFF DCD 0x0000FFFF - END diff --git a/vp9/common/arm/armv6/vp9_filter_v6.asm b/vp9/common/arm/armv6/vp9_filter_v6.asm deleted file mode 100644 index 16b321e37..000000000 --- a/vp9/common/arm/armv6/vp9_filter_v6.asm +++ /dev/null @@ -1,624 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_filter_block2d_first_pass_armv6| - EXPORT |vp9_filter_block2d_first_pass_16x16_armv6| - EXPORT |vp9_filter_block2d_first_pass_8x8_armv6| - EXPORT |vp9_filter_block2d_second_pass_armv6| - EXPORT |vp9_filter4_block2d_second_pass_armv6| - EXPORT |vp9_filter_block2d_first_pass_only_armv6| - EXPORT |vp9_filter_block2d_second_pass_only_armv6| - - AREA |.text|, CODE, READONLY ; name this block of code -;------------------------------------- -; r0 unsigned char *src_ptr -; r1 short *output_ptr -; r2 unsigned int src_pixels_per_line -; r3 unsigned int output_width -; stack unsigned int output_height -; stack const short *vp9_filter -;------------------------------------- -; vp9_filter the input and put in the output array. Apply the 6 tap FIR filter with -; the output being a 2 byte value and the intput being a 1 byte value. -|vp9_filter_block2d_first_pass_armv6| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r11, [sp, #40] ; vp9_filter address - ldr r7, [sp, #36] ; output height - - sub r2, r2, r3 ; inside loop increments input array, - ; so the height loop only needs to add - ; r2 - width to the input pointer - - mov r3, r3, lsl #1 ; multiply width by 2 because using shorts - add r12, r3, #16 ; square off the output - sub sp, sp, #4 - - ldr r4, [r11] ; load up packed filter coefficients - ldr r5, [r11, #4] - ldr r6, [r11, #8] - - str r1, [sp] ; push destination to stack - mov r7, r7, lsl #16 ; height is top part of counter - -; six tap filter -|height_loop_1st_6| - ldrb r8, [r0, #-2] ; load source data - ldrb r9, [r0, #-1] - ldrb r10, [r0], #2 - orr r7, r7, r3, lsr #2 ; construct loop counter - -|width_loop_1st_6| - ldrb r11, [r0, #-1] - - pkhbt lr, r8, r9, lsl #16 ; r9 | r8 - pkhbt r8, r9, r10, lsl #16 ; r10 | r9 - - ldrb r9, [r0] - - smuad lr, lr, r4 ; apply the filter - pkhbt r10, r10, r11, lsl #16 ; r11 | r10 - smuad r8, r8, r4 - pkhbt r11, r11, r9, lsl #16 ; r9 | r11 - - smlad lr, r10, r5, lr - ldrb r10, [r0, #1] - smlad r8, r11, r5, r8 - ldrb r11, [r0, #2] - - sub r7, r7, #1 - - pkhbt r9, r9, r10, lsl #16 ; r10 | r9 - pkhbt r10, r10, r11, lsl #16 ; r11 | r10 - - smlad lr, r9, r6, lr - smlad r11, r10, r6, r8 - - ands r10, r7, #0xff ; test loop counter - - add lr, lr, #0x40 ; round_shift_and_clamp - ldrneb r8, [r0, #-2] ; load data for next loop - usat lr, #8, lr, asr #7 - add r11, r11, #0x40 - ldrneb r9, [r0, #-1] - usat r11, #8, r11, asr #7 - - strh lr, [r1], r12 ; result is transposed and stored, which - ; will make second pass filtering easier. - ldrneb r10, [r0], #2 - strh r11, [r1], r12 - - bne width_loop_1st_6 - - ldr r1, [sp] ; load and update dst address - subs r7, r7, #0x10000 - add r0, r0, r2 ; move to next input line - - add r1, r1, #2 ; move over to next column - str r1, [sp] - - bne height_loop_1st_6 - - add sp, sp, #4 - ldmia sp!, {r4 - r11, pc} - - ENDP - -; -------------------------- -; 16x16 version -; ----------------------------- -|vp9_filter_block2d_first_pass_16x16_armv6| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r11, [sp, #40] ; vp9_filter address - ldr r7, [sp, #36] ; output height - - add r4, r2, #18 ; preload next low - pld [r0, r4] - - sub r2, r2, r3 ; inside loop increments input array, - ; so the height loop only needs to add - ; r2 - width to the input pointer - - mov r3, r3, lsl #1 ; multiply width by 2 because using shorts - add r12, r3, #16 ; square off the output - sub sp, sp, #4 - - ldr r4, [r11] ; load up packed filter coefficients - ldr r5, [r11, #4] - ldr r6, [r11, #8] - - str r1, [sp] ; push destination to stack - mov r7, r7, lsl #16 ; height is top part of counter - -; six tap filter -|height_loop_1st_16_6| - ldrb r8, [r0, #-2] ; load source data - ldrb r9, [r0, #-1] - ldrb r10, [r0], #2 - orr r7, r7, r3, lsr #2 ; construct loop counter - -|width_loop_1st_16_6| - ldrb r11, [r0, #-1] - - pkhbt lr, r8, r9, lsl #16 ; r9 | r8 - pkhbt r8, r9, r10, lsl #16 ; r10 | r9 - - ldrb r9, [r0] - - smuad lr, lr, r4 ; apply the filter - pkhbt r10, r10, r11, lsl #16 ; r11 | r10 - smuad r8, r8, r4 - pkhbt r11, r11, r9, lsl #16 ; r9 | r11 - - smlad lr, r10, r5, lr - ldrb r10, [r0, #1] - smlad r8, r11, r5, r8 - ldrb r11, [r0, #2] - - sub r7, r7, #1 - - pkhbt r9, r9, r10, lsl #16 ; r10 | r9 - pkhbt r10, r10, r11, lsl #16 ; r11 | r10 - - smlad lr, r9, r6, lr - smlad r11, r10, r6, r8 - - ands r10, r7, #0xff ; test loop counter - - add lr, lr, #0x40 ; round_shift_and_clamp - ldrneb r8, [r0, #-2] ; load data for next loop - usat lr, #8, lr, asr #7 - add r11, r11, #0x40 - ldrneb r9, [r0, #-1] - usat r11, #8, r11, asr #7 - - strh lr, [r1], r12 ; result is transposed and stored, which - ; will make second pass filtering easier. - ldrneb r10, [r0], #2 - strh r11, [r1], r12 - - bne width_loop_1st_16_6 - - ldr r1, [sp] ; load and update dst address - subs r7, r7, #0x10000 - add r0, r0, r2 ; move to next input line - - add r11, r2, #34 ; adding back block width(=16) - pld [r0, r11] ; preload next low - - add r1, r1, #2 ; move over to next column - str r1, [sp] - - bne height_loop_1st_16_6 - - add sp, sp, #4 - ldmia sp!, {r4 - r11, pc} - - ENDP - -; -------------------------- -; 8x8 version -; ----------------------------- -|vp9_filter_block2d_first_pass_8x8_armv6| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r11, [sp, #40] ; vp9_filter address - ldr r7, [sp, #36] ; output height - - add r4, r2, #10 ; preload next low - pld [r0, r4] - - sub r2, r2, r3 ; inside loop increments input array, - ; so the height loop only needs to add - ; r2 - width to the input pointer - - mov r3, r3, lsl #1 ; multiply width by 2 because using shorts - add r12, r3, #16 ; square off the output - sub sp, sp, #4 - - ldr r4, [r11] ; load up packed filter coefficients - ldr r5, [r11, #4] - ldr r6, [r11, #8] - - str r1, [sp] ; push destination to stack - mov r7, r7, lsl #16 ; height is top part of counter - -; six tap filter -|height_loop_1st_8_6| - ldrb r8, [r0, #-2] ; load source data - ldrb r9, [r0, #-1] - ldrb r10, [r0], #2 - orr r7, r7, r3, lsr #2 ; construct loop counter - -|width_loop_1st_8_6| - ldrb r11, [r0, #-1] - - pkhbt lr, r8, r9, lsl #16 ; r9 | r8 - pkhbt r8, r9, r10, lsl #16 ; r10 | r9 - - ldrb r9, [r0] - - smuad lr, lr, r4 ; apply the filter - pkhbt r10, r10, r11, lsl #16 ; r11 | r10 - smuad r8, r8, r4 - pkhbt r11, r11, r9, lsl #16 ; r9 | r11 - - smlad lr, r10, r5, lr - ldrb r10, [r0, #1] - smlad r8, r11, r5, r8 - ldrb r11, [r0, #2] - - sub r7, r7, #1 - - pkhbt r9, r9, r10, lsl #16 ; r10 | r9 - pkhbt r10, r10, r11, lsl #16 ; r11 | r10 - - smlad lr, r9, r6, lr - smlad r11, r10, r6, r8 - - ands r10, r7, #0xff ; test loop counter - - add lr, lr, #0x40 ; round_shift_and_clamp - ldrneb r8, [r0, #-2] ; load data for next loop - usat lr, #8, lr, asr #7 - add r11, r11, #0x40 - ldrneb r9, [r0, #-1] - usat r11, #8, r11, asr #7 - - strh lr, [r1], r12 ; result is transposed and stored, which - ; will make second pass filtering easier. - ldrneb r10, [r0], #2 - strh r11, [r1], r12 - - bne width_loop_1st_8_6 - - ldr r1, [sp] ; load and update dst address - subs r7, r7, #0x10000 - add r0, r0, r2 ; move to next input line - - add r11, r2, #18 ; adding back block width(=8) - pld [r0, r11] ; preload next low - - add r1, r1, #2 ; move over to next column - str r1, [sp] - - bne height_loop_1st_8_6 - - add sp, sp, #4 - ldmia sp!, {r4 - r11, pc} - - ENDP - -;--------------------------------- -; r0 short *src_ptr, -; r1 unsigned char *output_ptr, -; r2 unsigned int output_pitch, -; r3 unsigned int cnt, -; stack const short *vp9_filter -;--------------------------------- -|vp9_filter_block2d_second_pass_armv6| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r11, [sp, #36] ; vp9_filter address - sub sp, sp, #4 - mov r7, r3, lsl #16 ; height is top part of counter - str r1, [sp] ; push destination to stack - - ldr r4, [r11] ; load up packed filter coefficients - ldr r5, [r11, #4] - ldr r6, [r11, #8] - - pkhbt r12, r5, r4 ; pack the filter differently - pkhbt r11, r6, r5 - - sub r0, r0, #4 ; offset input buffer - -|height_loop_2nd| - ldr r8, [r0] ; load the data - ldr r9, [r0, #4] - orr r7, r7, r3, lsr #1 ; loop counter - -|width_loop_2nd| - smuad lr, r4, r8 ; apply filter - sub r7, r7, #1 - smulbt r8, r4, r8 - - ldr r10, [r0, #8] - - smlad lr, r5, r9, lr - smladx r8, r12, r9, r8 - - ldrh r9, [r0, #12] - - smlad lr, r6, r10, lr - smladx r8, r11, r10, r8 - - add r0, r0, #4 - smlatb r10, r6, r9, r8 - - add lr, lr, #0x40 ; round_shift_and_clamp - ands r8, r7, #0xff - usat lr, #8, lr, asr #7 - add r10, r10, #0x40 - strb lr, [r1], r2 ; the result is transposed back and stored - usat r10, #8, r10, asr #7 - - ldrne r8, [r0] ; load data for next loop - ldrne r9, [r0, #4] - strb r10, [r1], r2 - - bne width_loop_2nd - - ldr r1, [sp] ; update dst for next loop - subs r7, r7, #0x10000 - add r0, r0, #16 ; updata src for next loop - add r1, r1, #1 - str r1, [sp] - - bne height_loop_2nd - - add sp, sp, #4 - ldmia sp!, {r4 - r11, pc} - - ENDP - -;--------------------------------- -; r0 short *src_ptr, -; r1 unsigned char *output_ptr, -; r2 unsigned int output_pitch, -; r3 unsigned int cnt, -; stack const short *vp9_filter -;--------------------------------- -|vp9_filter4_block2d_second_pass_armv6| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r11, [sp, #36] ; vp9_filter address - mov r7, r3, lsl #16 ; height is top part of counter - - ldr r4, [r11] ; load up packed filter coefficients - add lr, r1, r3 ; save final destination pointer - ldr r5, [r11, #4] - ldr r6, [r11, #8] - - pkhbt r12, r5, r4 ; pack the filter differently - pkhbt r11, r6, r5 - mov r4, #0x40 ; rounding factor (for smlad{x}) - -|height_loop_2nd_4| - ldrd r8, [r0, #-4] ; load the data - orr r7, r7, r3, lsr #1 ; loop counter - -|width_loop_2nd_4| - ldr r10, [r0, #4]! - smladx r6, r9, r12, r4 ; apply filter - pkhbt r8, r9, r8 - smlad r5, r8, r12, r4 - pkhbt r8, r10, r9 - smladx r6, r10, r11, r6 - sub r7, r7, #1 - smlad r5, r8, r11, r5 - - mov r8, r9 ; shift the data for the next loop - mov r9, r10 - - usat r6, #8, r6, asr #7 ; shift and clamp - usat r5, #8, r5, asr #7 - - strb r5, [r1], r2 ; the result is transposed back and stored - tst r7, #0xff - strb r6, [r1], r2 - - bne width_loop_2nd_4 - - subs r7, r7, #0x10000 - add r0, r0, #16 ; update src for next loop - sub r1, lr, r7, lsr #16 ; update dst for next loop - - bne height_loop_2nd_4 - - ldmia sp!, {r4 - r11, pc} - - ENDP - -;------------------------------------ -; r0 unsigned char *src_ptr -; r1 unsigned char *output_ptr, -; r2 unsigned int src_pixels_per_line -; r3 unsigned int cnt, -; stack unsigned int output_pitch, -; stack const short *vp9_filter -;------------------------------------ -|vp9_filter_block2d_first_pass_only_armv6| PROC - stmdb sp!, {r4 - r11, lr} - - add r7, r2, r3 ; preload next low - add r7, r7, #2 - pld [r0, r7] - - ldr r4, [sp, #36] ; output pitch - ldr r11, [sp, #40] ; HFilter address - sub sp, sp, #8 - - mov r7, r3 - sub r2, r2, r3 ; inside loop increments input array, - ; so the height loop only needs to add - ; r2 - width to the input pointer - - sub r4, r4, r3 - str r4, [sp] ; save modified output pitch - str r2, [sp, #4] - - mov r2, #0x40 - - ldr r4, [r11] ; load up packed filter coefficients - ldr r5, [r11, #4] - ldr r6, [r11, #8] - -; six tap filter -|height_loop_1st_only_6| - ldrb r8, [r0, #-2] ; load data - ldrb r9, [r0, #-1] - ldrb r10, [r0], #2 - - mov r12, r3, lsr #1 ; loop counter - -|width_loop_1st_only_6| - ldrb r11, [r0, #-1] - - pkhbt lr, r8, r9, lsl #16 ; r9 | r8 - pkhbt r8, r9, r10, lsl #16 ; r10 | r9 - - ldrb r9, [r0] - -;; smuad lr, lr, r4 - smlad lr, lr, r4, r2 - pkhbt r10, r10, r11, lsl #16 ; r11 | r10 -;; smuad r8, r8, r4 - smlad r8, r8, r4, r2 - pkhbt r11, r11, r9, lsl #16 ; r9 | r11 - - smlad lr, r10, r5, lr - ldrb r10, [r0, #1] - smlad r8, r11, r5, r8 - ldrb r11, [r0, #2] - - subs r12, r12, #1 - - pkhbt r9, r9, r10, lsl #16 ; r10 | r9 - pkhbt r10, r10, r11, lsl #16 ; r11 | r10 - - smlad lr, r9, r6, lr - smlad r10, r10, r6, r8 - -;; add lr, lr, #0x40 ; round_shift_and_clamp - ldrneb r8, [r0, #-2] ; load data for next loop - usat lr, #8, lr, asr #7 -;; add r10, r10, #0x40 - strb lr, [r1], #1 ; store the result - usat r10, #8, r10, asr #7 - - ldrneb r9, [r0, #-1] - strb r10, [r1], #1 - ldrneb r10, [r0], #2 - - bne width_loop_1st_only_6 - - ldr lr, [sp] ; load back output pitch - ldr r12, [sp, #4] ; load back output pitch - subs r7, r7, #1 - add r0, r0, r12 ; updata src for next loop - - add r11, r12, r3 ; preload next low - add r11, r11, #2 - pld [r0, r11] - - add r1, r1, lr ; update dst for next loop - - bne height_loop_1st_only_6 - - add sp, sp, #8 - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp9_filter_block2d_first_pass_only_armv6| - - -;------------------------------------ -; r0 unsigned char *src_ptr, -; r1 unsigned char *output_ptr, -; r2 unsigned int src_pixels_per_line -; r3 unsigned int cnt, -; stack unsigned int output_pitch, -; stack const short *vp9_filter -;------------------------------------ -|vp9_filter_block2d_second_pass_only_armv6| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r11, [sp, #40] ; VFilter address - ldr r12, [sp, #36] ; output pitch - - mov r7, r3, lsl #16 ; height is top part of counter - sub r0, r0, r2, lsl #1 ; need 6 elements for filtering, 2 before, 3 after - - sub sp, sp, #8 - - ldr r4, [r11] ; load up packed filter coefficients - ldr r5, [r11, #4] - ldr r6, [r11, #8] - - str r0, [sp] ; save r0 to stack - str r1, [sp, #4] ; save dst to stack - -; six tap filter -|width_loop_2nd_only_6| - ldrb r8, [r0], r2 ; load data - orr r7, r7, r3 ; loop counter - ldrb r9, [r0], r2 - ldrb r10, [r0], r2 - -|height_loop_2nd_only_6| - ; filter first column in this inner loop, than, move to next colum. - ldrb r11, [r0], r2 - - pkhbt lr, r8, r9, lsl #16 ; r9 | r8 - pkhbt r8, r9, r10, lsl #16 ; r10 | r9 - - ldrb r9, [r0], r2 - - smuad lr, lr, r4 - pkhbt r10, r10, r11, lsl #16 ; r11 | r10 - smuad r8, r8, r4 - pkhbt r11, r11, r9, lsl #16 ; r9 | r11 - - smlad lr, r10, r5, lr - ldrb r10, [r0], r2 - smlad r8, r11, r5, r8 - ldrb r11, [r0] - - sub r7, r7, #2 - sub r0, r0, r2, lsl #2 - - pkhbt r9, r9, r10, lsl #16 ; r10 | r9 - pkhbt r10, r10, r11, lsl #16 ; r11 | r10 - - smlad lr, r9, r6, lr - smlad r10, r10, r6, r8 - - ands r9, r7, #0xff - - add lr, lr, #0x40 ; round_shift_and_clamp - ldrneb r8, [r0], r2 ; load data for next loop - usat lr, #8, lr, asr #7 - add r10, r10, #0x40 - strb lr, [r1], r12 ; store the result for the column - usat r10, #8, r10, asr #7 - - ldrneb r9, [r0], r2 - strb r10, [r1], r12 - ldrneb r10, [r0], r2 - - bne height_loop_2nd_only_6 - - ldr r0, [sp] - ldr r1, [sp, #4] - subs r7, r7, #0x10000 - add r0, r0, #1 ; move to filter next column - str r0, [sp] - add r1, r1, #1 - str r1, [sp, #4] - - bne width_loop_2nd_only_6 - - add sp, sp, #8 - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp9_filter_block2d_second_pass_only_armv6| - - END diff --git a/vp9/common/arm/armv6/vp9_idct_v6.asm b/vp9/common/arm/armv6/vp9_idct_v6.asm deleted file mode 100644 index 27215afcd..000000000 --- a/vp9/common/arm/armv6/vp9_idct_v6.asm +++ /dev/null @@ -1,345 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -; r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r14 - EXPORT |vp8_short_idct4x4llm_1_v6| - EXPORT |vp8_short_idct4x4llm_v6| - EXPORT |vp8_short_idct4x4llm_v6_scott| - EXPORT |vp8_short_idct4x4llm_v6_dual| - - AREA |.text|, CODE, READONLY - -;******************************************************************************** -;* void short_idct4x4llm_1_v6(INT16 * input, INT16 * output, INT32 pitch) -;* r0 INT16 * input -;* r1 INT16 * output -;* r2 INT32 pitch -;* bench: 3/5 -;******************************************************************************** - -|vp8_short_idct4x4llm_1_v6| PROC ; cycles in out pit - ; - ldrsh r0, [r0] ; load input[0] 1, r0 un 2 - add r0, r0, #4 ; 1 +4 - stmdb sp!, {r4, r5, lr} ; make room for wide writes 1 backup - mov r0, r0, asr #3 ; (input[0] + 4) >> 3 1, r0 req`d ^1 >> 3 - pkhbt r4, r0, r0, lsl #16 ; pack r0 into r4 1, r0 req`d ^1 pack - mov r5, r4 ; expand expand - - strd r4, [r1], r2 ; *output = r0, post inc 1 - strd r4, [r1], r2 ; 1 - strd r4, [r1], r2 ; 1 - strd r4, [r1] ; 1 - ; - ldmia sp!, {r4, r5, pc} ; replace vars, return restore - ENDP ; |vp8_short_idct4x4llm_1_v6| -;******************************************************************************** -;******************************************************************************** -;******************************************************************************** - -;******************************************************************************** -;* void short_idct4x4llm_v6(INT16 * input, INT16 * output, INT32 pitch) -;* r0 INT16 * input -;* r1 INT16 * output -;* r2 INT32 pitch -;* bench: -;******************************************************************************** - -|vp8_short_idct4x4llm_v6| PROC ; cycles in out pit - ; - stmdb sp!, {r4-r11, lr} ; backup registers 1 backup - ; - mov r4, #0x00004E00 ; 1 cst - orr r4, r4, #0x0000007B ; cospi8sqrt2minus1 - mov r5, #0x00008A00 ; 1 cst - orr r5, r5, #0x0000008C ; sinpi8sqrt2 - ; - mov r6, #4 ; i=4 1 i -loop1 ; - ldrsh r12, [r0, #8] ; input[4] 1, r12 unavail 2 [4] - ldrsh r3, [r0, #24] ; input[12] 1, r3 unavail 2 [12] - ldrsh r8, [r0, #16] ; input[8] 1, r8 unavail 2 [8] - ldrsh r7, [r0], #0x2 ; input[0] 1, r7 unavail 2 ++ [0] - smulwb r10, r5, r12 ; ([4] * sinpi8sqrt2) >> 16 1, r10 un 2, r12/r5 ^1 t1 - smulwb r11, r4, r3 ; ([12] * cospi8sqrt2minus1) >> 16 1, r11 un 2, r3/r4 ^1 t2 - add r9, r7, r8 ; a1 = [0] + [8] 1 a1 - sub r7, r7, r8 ; b1 = [0] - [8] 1 b1 - add r11, r3, r11 ; temp2 1 - rsb r11, r11, r10 ; c1 = temp1 - temp2 1 c1 - smulwb r3, r5, r3 ; ([12] * sinpi8sqrt2) >> 16 1, r3 un 2, r3/r5 ^ 1 t2 - smulwb r10, r4, r12 ; ([4] * cospi8sqrt2minus1) >> 16 1, r10 un 2, r12/r4 ^1 t1 - add r8, r7, r11 ; b1 + c1 1 b+c - strh r8, [r1, r2] ; out[pitch] = b1+c1 1 - sub r7, r7, r11 ; b1 - c1 1 b-c - add r10, r12, r10 ; temp1 1 - add r3, r10, r3 ; d1 = temp1 + temp2 1 d1 - add r10, r9, r3 ; a1 + d1 1 a+d - sub r3, r9, r3 ; a1 - d1 1 a-d - add r8, r2, r2 ; pitch * 2 1 p*2 - strh r7, [r1, r8] ; out[pitch*2] = b1-c1 1 - add r7, r2, r2, lsl #1 ; pitch * 3 1 p*3 - strh r3, [r1, r7] ; out[pitch*3] = a1-d1 1 - subs r6, r6, #1 ; i-- 1 -- - strh r10, [r1], #0x2 ; out[0] = a1+d1 1 ++ - bne loop1 ; if i>0, continue - ; - sub r1, r1, #8 ; set up out for next loop 1 -4 - ; for this iteration, input=prev output - mov r6, #4 ; i=4 1 i -; b returnfull -loop2 ; - ldrsh r11, [r1, #2] ; input[1] 1, r11 un 2 [1] - ldrsh r8, [r1, #6] ; input[3] 1, r8 un 2 [3] - ldrsh r3, [r1, #4] ; input[2] 1, r3 un 2 [2] - ldrsh r0, [r1] ; input[0] 1, r0 un 2 [0] - smulwb r9, r5, r11 ; ([1] * sinpi8sqrt2) >> 16 1, r9 un 2, r5/r11 ^1 t1 - smulwb r10, r4, r8 ; ([3] * cospi8sqrt2minus1) >> 16 1, r10 un 2, r4/r8 ^1 t2 - add r7, r0, r3 ; a1 = [0] + [2] 1 a1 - sub r0, r0, r3 ; b1 = [0] - [2] 1 b1 - add r10, r8, r10 ; temp2 1 - rsb r9, r10, r9 ; c1 = temp1 - temp2 1 c1 - smulwb r8, r5, r8 ; ([3] * sinpi8sqrt2) >> 16 1, r8 un 2, r5/r8 ^1 t2 - smulwb r10, r4, r11 ; ([1] * cospi8sqrt2minus1) >> 16 1, r10 un 2, r4/r11 ^1 t1 - add r3, r0, r9 ; b1+c1 1 b+c - add r3, r3, #4 ; b1+c1+4 1 +4 - add r10, r11, r10 ; temp1 1 - mov r3, r3, asr #3 ; b1+c1+4 >> 3 1, r3 ^1 >>3 - strh r3, [r1, #2] ; out[1] = b1+c1 1 - add r10, r10, r8 ; d1 = temp1 + temp2 1 d1 - add r3, r7, r10 ; a1+d1 1 a+d - add r3, r3, #4 ; a1+d1+4 1 +4 - sub r7, r7, r10 ; a1-d1 1 a-d - add r7, r7, #4 ; a1-d1+4 1 +4 - mov r3, r3, asr #3 ; a1+d1+4 >> 3 1, r3 ^1 >>3 - mov r7, r7, asr #3 ; a1-d1+4 >> 3 1, r7 ^1 >>3 - strh r7, [r1, #6] ; out[3] = a1-d1 1 - sub r0, r0, r9 ; b1-c1 1 b-c - add r0, r0, #4 ; b1-c1+4 1 +4 - subs r6, r6, #1 ; i-- 1 -- - mov r0, r0, asr #3 ; b1-c1+4 >> 3 1, r0 ^1 >>3 - strh r0, [r1, #4] ; out[2] = b1-c1 1 - strh r3, [r1], r2 ; out[0] = a1+d1 1 -; add r1, r1, r2 ; out += pitch 1 ++ - bne loop2 ; if i>0, continue -returnfull ; - ldmia sp!, {r4 - r11, pc} ; replace vars, return restore - ENDP - -;******************************************************************************** -;******************************************************************************** -;******************************************************************************** - -;******************************************************************************** -;* void short_idct4x4llm_v6_scott(INT16 * input, INT16 * output, INT32 pitch) -;* r0 INT16 * input -;* r1 INT16 * output -;* r2 INT32 pitch -;* bench: -;******************************************************************************** - -|vp8_short_idct4x4llm_v6_scott| PROC ; cycles in out pit -; mov r0, #0 ; -; ldr r0, [r0] ; - stmdb sp!, {r4 - r11, lr} ; backup registers 1 backup - ; - mov r3, #0x00004E00 ; cos - orr r3, r3, #0x0000007B ; cospi8sqrt2minus1 - mov r4, #0x00008A00 ; sin - orr r4, r4, #0x0000008C ; sinpi8sqrt2 - ; - mov r5, #0x2 ; i i - ; -short_idct4x4llm_v6_scott_loop1 ; - ldr r10, [r0, #(4*2)] ; i5 | i4 5,4 - ldr r11, [r0, #(12*2)] ; i13 | i12 13,12 - ; - smulwb r6, r4, r10 ; ((ip[4] * sinpi8sqrt2) >> 16) lt1 - smulwb r7, r3, r11 ; ((ip[12] * cospi8sqrt2minus1) >> 16) lt2 - ; - smulwb r12, r3, r10 ; ((ip[4] * cospi8sqrt2misu1) >> 16) l2t2 - smulwb r14, r4, r11 ; ((ip[12] * sinpi8sqrt2) >> 16) l2t1 - ; - add r6, r6, r7 ; partial c1 lt1-lt2 - add r12, r12, r14 ; partial d1 l2t2+l2t1 - ; - smulwt r14, r4, r10 ; ((ip[5] * sinpi8sqrt2) >> 16) ht1 - smulwt r7, r3, r11 ; ((ip[13] * cospi8sqrt2minus1) >> 16) ht2 - ; - smulwt r8, r3, r10 ; ((ip[5] * cospi8sqrt2minus1) >> 16) h2t1 - smulwt r9, r4, r11 ; ((ip[13] * sinpi8sqrt2) >> 16) h2t2 - ; - add r7, r14, r7 ; partial c1_2 ht1+ht2 - sub r8, r8, r9 ; partial d1_2 h2t1-h2t2 - ; - pkhbt r6, r6, r7, lsl #16 ; partial c1_2 | partial c1_1 pack - pkhbt r12, r12, r8, lsl #16 ; partial d1_2 | partial d1_1 pack - ; - usub16 r6, r6, r10 ; c1_2 | c1_1 c - uadd16 r12, r12, r11 ; d1_2 | d1_1 d - ; - ldr r10, [r0, #0] ; i1 | i0 1,0 - ldr r11, [r0, #(8*2)] ; i9 | i10 9,10 - ; -;;;;;; add r0, r0, #0x4 ; +4 -;;;;;; add r1, r1, #0x4 ; +4 - ; - uadd16 r8, r10, r11 ; i1 + i9 | i0 + i8 aka a1 a - usub16 r9, r10, r11 ; i1 - i9 | i0 - i8 aka b1 b - ; - uadd16 r7, r8, r12 ; a1 + d1 pair a+d - usub16 r14, r8, r12 ; a1 - d1 pair a-d - ; - str r7, [r1] ; op[0] = a1 + d1 - str r14, [r1, r2] ; op[pitch*3] = a1 - d1 - ; - add r0, r0, #0x4 ; op[pitch] = b1 + c1 ++ - add r1, r1, #0x4 ; op[pitch*2] = b1 - c1 ++ - ; - subs r5, r5, #0x1 ; -- - bne short_idct4x4llm_v6_scott_loop1 ; - ; - sub r1, r1, #16 ; reset output ptr - mov r5, #0x4 ; - mov r0, r1 ; input = output - ; -short_idct4x4llm_v6_scott_loop2 ; - ; - subs r5, r5, #0x1 ; - bne short_idct4x4llm_v6_scott_loop2 ; - ; - ldmia sp!, {r4 - r11, pc} ; - ENDP ; - ; -;******************************************************************************** -;******************************************************************************** -;******************************************************************************** - -;******************************************************************************** -;* void short_idct4x4llm_v6_dual(INT16 * input, INT16 * output, INT32 pitch) -;* r0 INT16 * input -;* r1 INT16 * output -;* r2 INT32 pitch -;* bench: -;******************************************************************************** - -|vp8_short_idct4x4llm_v6_dual| PROC ; cycles in out pit - ; - stmdb sp!, {r4-r11, lr} ; backup registers 1 backup - mov r3, #0x00004E00 ; cos - orr r3, r3, #0x0000007B ; cospi8sqrt2minus1 - mov r4, #0x00008A00 ; sin - orr r4, r4, #0x0000008C ; sinpi8sqrt2 - mov r5, #0x2 ; i=2 i -loop1_dual - ldr r6, [r0, #(4*2)] ; i5 | i4 5|4 - ldr r12, [r0, #(12*2)] ; i13 | i12 13|12 - ldr r14, [r0, #(8*2)] ; i9 | i8 9|8 - - smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c - smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c - smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s - smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s - pkhbt r7, r7, r9, lsl #16 ; 5c | 4c - smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c - pkhbt r8, r8, r10, lsl #16 ; 5s | 4s - uadd16 r6, r6, r7 ; 5c+5 | 4c+4 - smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s - smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c - smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s - subs r5, r5, #0x1 ; i-- -- - pkhbt r9, r9, r11, lsl #16 ; 13c | 12c - ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0 - pkhbt r10, r10, r7, lsl #16 ; 13s | 12s - uadd16 r7, r12, r9 ; 13c+13 | 12c+12 - usub16 r7, r8, r7 ; c c - uadd16 r6, r6, r10 ; d d - uadd16 r10, r11, r14 ; a a - usub16 r8, r11, r14 ; b b - uadd16 r9, r10, r6 ; a+d a+d - usub16 r10, r10, r6 ; a-d a-d - uadd16 r6, r8, r7 ; b+c b+c - usub16 r7, r8, r7 ; b-c b-c - str r6, [r1, r2] ; o5 | o4 - add r6, r2, r2 ; pitch * 2 p2 - str r7, [r1, r6] ; o9 | o8 - add r6, r6, r2 ; pitch * 3 p3 - str r10, [r1, r6] ; o13 | o12 - str r9, [r1], #0x4 ; o1 | o0 ++ - bne loop1_dual ; - mov r5, #0x2 ; i=2 i - sub r0, r1, #8 ; reset input/output i/o -loop2_dual - ldr r6, [r0, r2] ; i5 | i4 5|4 - ldr r1, [r0] ; i1 | i0 1|0 - ldr r12, [r0, #0x4] ; i3 | i2 3|2 - add r14, r2, #0x4 ; pitch + 2 p+2 - ldr r14, [r0, r14] ; i7 | i6 7|6 - smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c - smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c - smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s - smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s - pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4 - pkhbt r7, r9, r7, lsl #16 ; 1c | 5c - pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1 - pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5 - uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2 - pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6 - uadd16 r10, r11, r9 ; a a - usub16 r9, r11, r9 ; b b - pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7 - subs r5, r5, #0x1 ; i-- -- - smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c - smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s - smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c - smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s - - pkhbt r7, r12, r7, lsl #16 ; 3c | 7c - pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1 - uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2 - usub16 r12, r8, r6 ; c (o1 | o5) c - uadd16 r6, r11, r1 ; d (o3 | o7) d - uadd16 r7, r10, r6 ; a+d a+d - mov r8, #0x4 ; set up 4's 4 - orr r8, r8, #0x40000 ; 4|4 - usub16 r6, r10, r6 ; a-d a-d - uadd16 r6, r6, r8 ; a-d+4 3|7 - uadd16 r7, r7, r8 ; a+d+4 0|4 - uadd16 r10, r9, r12 ; b+c b+c - usub16 r1, r9, r12 ; b-c b-c - uadd16 r10, r10, r8 ; b+c+4 1|5 - uadd16 r1, r1, r8 ; b-c+4 2|6 - mov r8, r10, asr #19 ; o1 >> 3 - strh r8, [r0, #2] ; o1 - mov r8, r1, asr #19 ; o2 >> 3 - strh r8, [r0, #4] ; o2 - mov r8, r6, asr #19 ; o3 >> 3 - strh r8, [r0, #6] ; o3 - mov r8, r7, asr #19 ; o0 >> 3 - strh r8, [r0], r2 ; o0 +p - sxth r10, r10 ; - mov r8, r10, asr #3 ; o5 >> 3 - strh r8, [r0, #2] ; o5 - sxth r1, r1 ; - mov r8, r1, asr #3 ; o6 >> 3 - strh r8, [r0, #4] ; o6 - sxth r6, r6 ; - mov r8, r6, asr #3 ; o7 >> 3 - strh r8, [r0, #6] ; o7 - sxth r7, r7 ; - mov r8, r7, asr #3 ; o4 >> 3 - strh r8, [r0], r2 ; o4 +p -;;;;; subs r5, r5, #0x1 ; i-- -- - bne loop2_dual ; - ; - ldmia sp!, {r4 - r11, pc} ; replace vars, return restore - ENDP - - END diff --git a/vp9/common/arm/armv6/vp9_iwalsh_v6.asm b/vp9/common/arm/armv6/vp9_iwalsh_v6.asm deleted file mode 100644 index 463bff0f5..000000000 --- a/vp9/common/arm/armv6/vp9_iwalsh_v6.asm +++ /dev/null @@ -1,152 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp8_short_inv_walsh4x4_v6| - EXPORT |vp8_short_inv_walsh4x4_1_v6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY ; name this block of code - -;short vp8_short_inv_walsh4x4_v6(short *input, short *output) -|vp8_short_inv_walsh4x4_v6| PROC - - stmdb sp!, {r4 - r11, lr} - - ldr r2, [r0], #4 ; [1 | 0] - ldr r3, [r0], #4 ; [3 | 2] - ldr r4, [r0], #4 ; [5 | 4] - ldr r5, [r0], #4 ; [7 | 6] - ldr r6, [r0], #4 ; [9 | 8] - ldr r7, [r0], #4 ; [11 | 10] - ldr r8, [r0], #4 ; [13 | 12] - ldr r9, [r0] ; [15 | 14] - - qadd16 r10, r2, r8 ; a1 [1+13 | 0+12] - qadd16 r11, r4, r6 ; b1 [5+9 | 4+8] - qsub16 r12, r4, r6 ; c1 [5-9 | 4-8] - qsub16 lr, r2, r8 ; d1 [1-13 | 0-12] - - qadd16 r2, r10, r11 ; a1 + b1 [1 | 0] - qadd16 r4, r12, lr ; c1 + d1 [5 | 4] - qsub16 r6, r10, r11 ; a1 - b1 [9 | 8] - qsub16 r8, lr, r12 ; d1 - c1 [13 | 12] - - qadd16 r10, r3, r9 ; a1 [3+15 | 2+14] - qadd16 r11, r5, r7 ; b1 [7+11 | 6+10] - qsub16 r12, r5, r7 ; c1 [7-11 | 6-10] - qsub16 lr, r3, r9 ; d1 [3-15 | 2-14] - - qadd16 r3, r10, r11 ; a1 + b1 [3 | 2] - qadd16 r5, r12, lr ; c1 + d1 [7 | 6] - qsub16 r7, r10, r11 ; a1 - b1 [11 | 10] - qsub16 r9, lr, r12 ; d1 - c1 [15 | 14] - - ; first transform complete - - qsubaddx r10, r2, r3 ; [c1|a1] [1-2 | 0+3] - qaddsubx r11, r2, r3 ; [b1|d1] [1+2 | 0-3] - qsubaddx r12, r4, r5 ; [c1|a1] [5-6 | 4+7] - qaddsubx lr, r4, r5 ; [b1|d1] [5+6 | 4-7] - - qaddsubx r2, r10, r11 ; [b2|c2] [c1+d1 | a1-b1] - qaddsubx r3, r11, r10 ; [a2|d2] [b1+a1 | d1-c1] - ldr r10, c0x00030003 - qaddsubx r4, r12, lr ; [b2|c2] [c1+d1 | a1-b1] - qaddsubx r5, lr, r12 ; [a2|d2] [b1+a1 | d1-c1] - - qadd16 r2, r2, r10 ; [b2+3|c2+3] - qadd16 r3, r3, r10 ; [a2+3|d2+3] - qadd16 r4, r4, r10 ; [b2+3|c2+3] - qadd16 r5, r5, r10 ; [a2+3|d2+3] - - asr r12, r2, #3 ; [1 | x] - pkhtb r12, r12, r3, asr #19; [1 | 0] - lsl lr, r3, #16 ; [~3 | x] - lsl r2, r2, #16 ; [~2 | x] - asr lr, lr, #3 ; [3 | x] - pkhtb lr, lr, r2, asr #19 ; [3 | 2] - - asr r2, r4, #3 ; [5 | x] - pkhtb r2, r2, r5, asr #19 ; [5 | 4] - lsl r3, r5, #16 ; [~7 | x] - lsl r4, r4, #16 ; [~6 | x] - asr r3, r3, #3 ; [7 | x] - pkhtb r3, r3, r4, asr #19 ; [7 | 6] - - str r12, [r1], #4 - str lr, [r1], #4 - str r2, [r1], #4 - str r3, [r1], #4 - - qsubaddx r2, r6, r7 ; [c1|a1] [9-10 | 8+11] - qaddsubx r3, r6, r7 ; [b1|d1] [9+10 | 8-11] - qsubaddx r4, r8, r9 ; [c1|a1] [13-14 | 12+15] - qaddsubx r5, r8, r9 ; [b1|d1] [13+14 | 12-15] - - qaddsubx r6, r2, r3 ; [b2|c2] [c1+d1 | a1-b1] - qaddsubx r7, r3, r2 ; [a2|d2] [b1+a1 | d1-c1] - qaddsubx r8, r4, r5 ; [b2|c2] [c1+d1 | a1-b1] - qaddsubx r9, r5, r4 ; [a2|d2] [b1+a1 | d1-c1] - - qadd16 r6, r6, r10 ; [b2+3|c2+3] - qadd16 r7, r7, r10 ; [a2+3|d2+3] - qadd16 r8, r8, r10 ; [b2+3|c2+3] - qadd16 r9, r9, r10 ; [a2+3|d2+3] - - asr r2, r6, #3 ; [9 | x] - pkhtb r2, r2, r7, asr #19 ; [9 | 8] - lsl r3, r7, #16 ; [~11| x] - lsl r4, r6, #16 ; [~10| x] - asr r3, r3, #3 ; [11 | x] - pkhtb r3, r3, r4, asr #19 ; [11 | 10] - - asr r4, r8, #3 ; [13 | x] - pkhtb r4, r4, r9, asr #19 ; [13 | 12] - lsl r5, r9, #16 ; [~15| x] - lsl r6, r8, #16 ; [~14| x] - asr r5, r5, #3 ; [15 | x] - pkhtb r5, r5, r6, asr #19 ; [15 | 14] - - str r2, [r1], #4 - str r3, [r1], #4 - str r4, [r1], #4 - str r5, [r1] - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp8_short_inv_walsh4x4_v6| - - -;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output) -|vp8_short_inv_walsh4x4_1_v6| PROC - - ldrsh r2, [r0] ; [0] - add r2, r2, #3 ; [0] + 3 - asr r2, r2, #3 ; a1 ([0]+3) >> 3 - lsl r2, r2, #16 ; [a1 | x] - orr r2, r2, r2, lsr #16 ; [a1 | a1] - - str r2, [r1], #4 - str r2, [r1], #4 - str r2, [r1], #4 - str r2, [r1], #4 - str r2, [r1], #4 - str r2, [r1], #4 - str r2, [r1], #4 - str r2, [r1] - - bx lr - ENDP ; |vp8_short_inv_walsh4x4_1_v6| - -; Constant Pool -c0x00030003 DCD 0x00030003 - END diff --git a/vp9/common/arm/armv6/vp9_loopfilter_v6.asm b/vp9/common/arm/armv6/vp9_loopfilter_v6.asm deleted file mode 100644 index 37b54a39c..000000000 --- a/vp9/common/arm/armv6/vp9_loopfilter_v6.asm +++ /dev/null @@ -1,1282 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_loop_filter_horizontal_edge_armv6| - EXPORT |vp9_mbloop_filter_horizontal_edge_armv6| - EXPORT |vp9_loop_filter_vertical_edge_armv6| - EXPORT |vp9_mbloop_filter_vertical_edge_armv6| - - AREA |.text|, CODE, READONLY ; name this block of code - - MACRO - TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3 - ; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3 - ; a0: 03 02 01 00 - ; a1: 13 12 11 10 - ; a2: 23 22 21 20 - ; a3: 33 32 31 30 - ; b3 b2 b1 b0 - - uxtb16 $b1, $a1 ; xx 12 xx 10 - uxtb16 $b0, $a0 ; xx 02 xx 00 - uxtb16 $b3, $a3 ; xx 32 xx 30 - uxtb16 $b2, $a2 ; xx 22 xx 20 - orr $b1, $b0, $b1, lsl #8 ; 12 02 10 00 - orr $b3, $b2, $b3, lsl #8 ; 32 22 30 20 - - uxtb16 $a1, $a1, ror #8 ; xx 13 xx 11 - uxtb16 $a3, $a3, ror #8 ; xx 33 xx 31 - uxtb16 $a0, $a0, ror #8 ; xx 03 xx 01 - uxtb16 $a2, $a2, ror #8 ; xx 23 xx 21 - orr $a0, $a0, $a1, lsl #8 ; 13 03 11 01 - orr $a2, $a2, $a3, lsl #8 ; 33 23 31 21 - - pkhtb $b2, $b3, $b1, asr #16 ; 32 22 12 02 -- p1 - pkhbt $b0, $b1, $b3, lsl #16 ; 30 20 10 00 -- p3 - - pkhtb $b3, $a2, $a0, asr #16 ; 33 23 13 03 -- p0 - pkhbt $b1, $a0, $a2, lsl #16 ; 31 21 11 01 -- p2 - MEND - - -src RN r0 -pstep RN r1 -count RN r5 - -;r0 unsigned char *src_ptr, -;r1 int src_pixel_step, -;r2 const char *blimit, -;r3 const char *limit, -;stack const char *thresh, -;stack int count - -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- -|vp9_loop_filter_horizontal_edge_armv6| PROC -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - stmdb sp!, {r4 - r11, lr} - - sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines - ldr count, [sp, #40] ; count for 8-in-parallel - ldr r6, [sp, #36] ; load thresh address - sub sp, sp, #16 ; create temp buffer - - ldr r9, [src], pstep ; p3 - ldrb r4, [r2] ; blimit - ldr r10, [src], pstep ; p2 - ldrb r2, [r3] ; limit - ldr r11, [src], pstep ; p1 - orr r4, r4, r4, lsl #8 - ldrb r3, [r6] ; thresh - orr r2, r2, r2, lsl #8 - mov count, count, lsl #1 ; 4-in-parallel - orr r4, r4, r4, lsl #16 - orr r3, r3, r3, lsl #8 - orr r2, r2, r2, lsl #16 - orr r3, r3, r3, lsl #16 - -|Hnext8| - ; vp9_filter_mask() function - ; calculate breakout conditions - ldr r12, [src], pstep ; p0 - - uqsub8 r6, r9, r10 ; p3 - p2 - uqsub8 r7, r10, r9 ; p2 - p3 - uqsub8 r8, r10, r11 ; p2 - p1 - uqsub8 r10, r11, r10 ; p1 - p2 - - orr r6, r6, r7 ; abs (p3-p2) - orr r8, r8, r10 ; abs (p2-p1) - uqsub8 lr, r6, r2 ; compare to limit. lr: vp9_filter_mask - uqsub8 r8, r8, r2 ; compare to limit - uqsub8 r6, r11, r12 ; p1 - p0 - orr lr, lr, r8 - uqsub8 r7, r12, r11 ; p0 - p1 - ldr r9, [src], pstep ; q0 - ldr r10, [src], pstep ; q1 - orr r6, r6, r7 ; abs (p1-p0) - uqsub8 r7, r6, r2 ; compare to limit - uqsub8 r8, r6, r3 ; compare to thresh -- save r8 for later - orr lr, lr, r7 - - uqsub8 r6, r11, r10 ; p1 - q1 - uqsub8 r7, r10, r11 ; q1 - p1 - uqsub8 r11, r12, r9 ; p0 - q0 - uqsub8 r12, r9, r12 ; q0 - p0 - orr r6, r6, r7 ; abs (p1-q1) - ldr r7, c0x7F7F7F7F - orr r12, r11, r12 ; abs (p0-q0) - ldr r11, [src], pstep ; q2 - uqadd8 r12, r12, r12 ; abs (p0-q0) * 2 - and r6, r7, r6, lsr #1 ; abs (p1-q1) / 2 - uqsub8 r7, r9, r10 ; q0 - q1 - uqadd8 r12, r12, r6 ; abs (p0-q0)*2 + abs (p1-q1)/2 - uqsub8 r6, r10, r9 ; q1 - q0 - uqsub8 r12, r12, r4 ; compare to flimit - uqsub8 r9, r11, r10 ; q2 - q1 - - orr lr, lr, r12 - - ldr r12, [src], pstep ; q3 - uqsub8 r10, r10, r11 ; q1 - q2 - orr r6, r7, r6 ; abs (q1-q0) - orr r10, r9, r10 ; abs (q2-q1) - uqsub8 r7, r6, r2 ; compare to limit - uqsub8 r10, r10, r2 ; compare to limit - uqsub8 r6, r6, r3 ; compare to thresh -- save r6 for later - orr lr, lr, r7 - orr lr, lr, r10 - - uqsub8 r10, r12, r11 ; q3 - q2 - uqsub8 r9, r11, r12 ; q2 - q3 - - mvn r11, #0 ; r11 == -1 - - orr r10, r10, r9 ; abs (q3-q2) - uqsub8 r10, r10, r2 ; compare to limit - - mov r12, #0 - orr lr, lr, r10 - sub src, src, pstep, lsl #2 - - usub8 lr, r12, lr ; use usub8 instead of ssub8 - sel lr, r11, r12 ; filter mask: lr - - cmp lr, #0 - beq hskip_filter ; skip filtering - - sub src, src, pstep, lsl #1 ; move src pointer down by 6 lines - - ;vp8_hevmask() function - ;calculate high edge variance - orr r10, r6, r8 ; calculate vp8_hevmask - - ldr r7, [src], pstep ; p1 - - usub8 r10, r12, r10 ; use usub8 instead of ssub8 - sel r6, r12, r11 ; obtain vp8_hevmask: r6 - - ;vp9_filter() function - ldr r8, [src], pstep ; p0 - ldr r12, c0x80808080 - ldr r9, [src], pstep ; q0 - ldr r10, [src], pstep ; q1 - - eor r7, r7, r12 ; p1 offset to convert to a signed value - eor r8, r8, r12 ; p0 offset to convert to a signed value - eor r9, r9, r12 ; q0 offset to convert to a signed value - eor r10, r10, r12 ; q1 offset to convert to a signed value - - str r9, [sp] ; store qs0 temporarily - str r8, [sp, #4] ; store ps0 temporarily - str r10, [sp, #8] ; store qs1 temporarily - str r7, [sp, #12] ; store ps1 temporarily - - qsub8 r7, r7, r10 ; vp9_signed_char_clamp(ps1-qs1) - qsub8 r8, r9, r8 ; vp9_signed_char_clamp(vp9_filter + 3 * ( qs0 - ps0)) - - and r7, r7, r6 ; vp9_filter (r7) &= hev - - qadd8 r7, r7, r8 - ldr r9, c0x03030303 ; r9 = 3 --modified for vp8 - - qadd8 r7, r7, r8 - ldr r10, c0x04040404 - - qadd8 r7, r7, r8 - and r7, r7, lr ; vp9_filter &= mask; - - ;modify code for vp8 -- Filter1 = vp9_filter (r7) - qadd8 r8 , r7 , r9 ; Filter2 (r8) = vp9_signed_char_clamp(vp9_filter+3) - qadd8 r7 , r7 , r10 ; vp9_filter = vp9_signed_char_clamp(vp9_filter+4) - - mov r9, #0 - shadd8 r8 , r8 , r9 ; Filter2 >>= 3 - shadd8 r7 , r7 , r9 ; vp9_filter >>= 3 - shadd8 r8 , r8 , r9 - shadd8 r7 , r7 , r9 - shadd8 lr , r8 , r9 ; lr: Filter2 - shadd8 r7 , r7 , r9 ; r7: filter - - ;usub8 lr, r8, r10 ; s = (s==4)*-1 - ;sel lr, r11, r9 - ;usub8 r8, r10, r8 - ;sel r8, r11, r9 - ;and r8, r8, lr ; -1 for each element that equals 4 - - ;calculate output - ;qadd8 lr, r8, r7 ; u = vp9_signed_char_clamp(s + vp9_filter) - - ldr r8, [sp] ; load qs0 - ldr r9, [sp, #4] ; load ps0 - - ldr r10, c0x01010101 - - qsub8 r8 ,r8, r7 ; u = vp9_signed_char_clamp(qs0 - vp9_filter) - qadd8 r9, r9, lr ; u = vp9_signed_char_clamp(ps0 + Filter2) - - ;end of modification for vp8 - - mov lr, #0 - sadd8 r7, r7 , r10 ; vp9_filter += 1 - shadd8 r7, r7, lr ; vp9_filter >>= 1 - - ldr r11, [sp, #12] ; load ps1 - ldr r10, [sp, #8] ; load qs1 - - bic r7, r7, r6 ; vp9_filter &= ~hev - sub src, src, pstep, lsl #2 - - qadd8 r11, r11, r7 ; u = vp9_signed_char_clamp(ps1 + vp9_filter) - qsub8 r10, r10,r7 ; u = vp9_signed_char_clamp(qs1 - vp9_filter) - - eor r11, r11, r12 ; *op1 = u^0x80 - str r11, [src], pstep ; store op1 - eor r9, r9, r12 ; *op0 = u^0x80 - str r9, [src], pstep ; store op0 result - eor r8, r8, r12 ; *oq0 = u^0x80 - str r8, [src], pstep ; store oq0 result - eor r10, r10, r12 ; *oq1 = u^0x80 - str r10, [src], pstep ; store oq1 - - sub src, src, pstep, lsl #1 - -|hskip_filter| - add src, src, #4 - sub src, src, pstep, lsl #2 - - subs count, count, #1 - - ldrne r9, [src], pstep ; p3 - ldrne r10, [src], pstep ; p2 - ldrne r11, [src], pstep ; p1 - - bne Hnext8 - - add sp, sp, #16 - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp9_loop_filter_horizontal_edge_armv6| - - -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- -|vp8_mbloop_filter_horizontal_edge_armv6| PROC -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - stmdb sp!, {r4 - r11, lr} - - sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines - ldr count, [sp, #40] ; count for 8-in-parallel - ldr r6, [sp, #36] ; load thresh address - sub sp, sp, #16 ; create temp buffer - - ldr r9, [src], pstep ; p3 - ldrb r4, [r2] ; blimit - ldr r10, [src], pstep ; p2 - ldrb r2, [r3] ; limit - ldr r11, [src], pstep ; p1 - orr r4, r4, r4, lsl #8 - ldrb r3, [r6] ; thresh - orr r2, r2, r2, lsl #8 - mov count, count, lsl #1 ; 4-in-parallel - orr r4, r4, r4, lsl #16 - orr r3, r3, r3, lsl #8 - orr r2, r2, r2, lsl #16 - orr r3, r3, r3, lsl #16 - -|MBHnext8| - - ; vp9_filter_mask() function - ; calculate breakout conditions - ldr r12, [src], pstep ; p0 - - uqsub8 r6, r9, r10 ; p3 - p2 - uqsub8 r7, r10, r9 ; p2 - p3 - uqsub8 r8, r10, r11 ; p2 - p1 - uqsub8 r10, r11, r10 ; p1 - p2 - - orr r6, r6, r7 ; abs (p3-p2) - orr r8, r8, r10 ; abs (p2-p1) - uqsub8 lr, r6, r2 ; compare to limit. lr: vp9_filter_mask - uqsub8 r8, r8, r2 ; compare to limit - - uqsub8 r6, r11, r12 ; p1 - p0 - orr lr, lr, r8 - uqsub8 r7, r12, r11 ; p0 - p1 - ldr r9, [src], pstep ; q0 - ldr r10, [src], pstep ; q1 - orr r6, r6, r7 ; abs (p1-p0) - uqsub8 r7, r6, r2 ; compare to limit - uqsub8 r8, r6, r3 ; compare to thresh -- save r8 for later - orr lr, lr, r7 - - uqsub8 r6, r11, r10 ; p1 - q1 - uqsub8 r7, r10, r11 ; q1 - p1 - uqsub8 r11, r12, r9 ; p0 - q0 - uqsub8 r12, r9, r12 ; q0 - p0 - orr r6, r6, r7 ; abs (p1-q1) - ldr r7, c0x7F7F7F7F - orr r12, r11, r12 ; abs (p0-q0) - ldr r11, [src], pstep ; q2 - uqadd8 r12, r12, r12 ; abs (p0-q0) * 2 - and r6, r7, r6, lsr #1 ; abs (p1-q1) / 2 - uqsub8 r7, r9, r10 ; q0 - q1 - uqadd8 r12, r12, r6 ; abs (p0-q0)*2 + abs (p1-q1)/2 - uqsub8 r6, r10, r9 ; q1 - q0 - uqsub8 r12, r12, r4 ; compare to flimit - uqsub8 r9, r11, r10 ; q2 - q1 - - orr lr, lr, r12 - - ldr r12, [src], pstep ; q3 - - uqsub8 r10, r10, r11 ; q1 - q2 - orr r6, r7, r6 ; abs (q1-q0) - orr r10, r9, r10 ; abs (q2-q1) - uqsub8 r7, r6, r2 ; compare to limit - uqsub8 r10, r10, r2 ; compare to limit - uqsub8 r6, r6, r3 ; compare to thresh -- save r6 for later - orr lr, lr, r7 - orr lr, lr, r10 - - uqsub8 r10, r12, r11 ; q3 - q2 - uqsub8 r9, r11, r12 ; q2 - q3 - - mvn r11, #0 ; r11 == -1 - - orr r10, r10, r9 ; abs (q3-q2) - uqsub8 r10, r10, r2 ; compare to limit - - mov r12, #0 - - orr lr, lr, r10 - - usub8 lr, r12, lr ; use usub8 instead of ssub8 - sel lr, r11, r12 ; filter mask: lr - - cmp lr, #0 - beq mbhskip_filter ; skip filtering - - ;vp8_hevmask() function - ;calculate high edge variance - sub src, src, pstep, lsl #2 ; move src pointer down by 6 lines - sub src, src, pstep, lsl #1 - - orr r10, r6, r8 - ldr r7, [src], pstep ; p1 - - usub8 r10, r12, r10 - sel r6, r12, r11 ; hev mask: r6 - - ;vp8_mbfilter() function - ;p2, q2 are only needed at the end. Don't need to load them in now. - ldr r8, [src], pstep ; p0 - ldr r12, c0x80808080 - ldr r9, [src], pstep ; q0 - ldr r10, [src] ; q1 - - eor r7, r7, r12 ; ps1 - eor r8, r8, r12 ; ps0 - eor r9, r9, r12 ; qs0 - eor r10, r10, r12 ; qs1 - - qsub8 r12, r9, r8 ; vp9_signed_char_clamp(vp9_filter + 3 * ( qs0 - ps0)) - str r7, [sp, #12] ; store ps1 temporarily - qsub8 r7, r7, r10 ; vp9_signed_char_clamp(ps1-qs1) - str r10, [sp, #8] ; store qs1 temporarily - qadd8 r7, r7, r12 - str r9, [sp] ; store qs0 temporarily - qadd8 r7, r7, r12 - str r8, [sp, #4] ; store ps0 temporarily - qadd8 r7, r7, r12 ; vp9_filter: r7 - - ldr r10, c0x03030303 ; r10 = 3 --modified for vp8 - ldr r9, c0x04040404 - - and r7, r7, lr ; vp9_filter &= mask (lr is free) - - mov r12, r7 ; Filter2: r12 - and r12, r12, r6 ; Filter2 &= hev - - ;modify code for vp8 - ;save bottom 3 bits so that we round one side +4 and the other +3 - qadd8 r8 , r12 , r9 ; Filter1 (r8) = vp9_signed_char_clamp(Filter2+4) - qadd8 r12 , r12 , r10 ; Filter2 (r12) = vp9_signed_char_clamp(Filter2+3) - - mov r10, #0 - shadd8 r8 , r8 , r10 ; Filter1 >>= 3 - shadd8 r12 , r12 , r10 ; Filter2 >>= 3 - shadd8 r8 , r8 , r10 - shadd8 r12 , r12 , r10 - shadd8 r8 , r8 , r10 ; r8: Filter1 - shadd8 r12 , r12 , r10 ; r12: Filter2 - - ldr r9, [sp] ; load qs0 - ldr r11, [sp, #4] ; load ps0 - - qsub8 r9 , r9, r8 ; qs0 = vp9_signed_char_clamp(qs0 - Filter1) - qadd8 r11, r11, r12 ; ps0 = vp9_signed_char_clamp(ps0 + Filter2) - - ;save bottom 3 bits so that we round one side +4 and the other +3 - ;and r8, r12, r10 ; s = Filter2 & 7 (s: r8) - ;qadd8 r12 , r12 , r9 ; Filter2 = vp9_signed_char_clamp(Filter2+4) - ;mov r10, #0 - ;shadd8 r12 , r12 , r10 ; Filter2 >>= 3 - ;usub8 lr, r8, r9 ; s = (s==4)*-1 - ;sel lr, r11, r10 - ;shadd8 r12 , r12 , r10 - ;usub8 r8, r9, r8 - ;sel r8, r11, r10 - ;ldr r9, [sp] ; load qs0 - ;ldr r11, [sp, #4] ; load ps0 - ;shadd8 r12 , r12 , r10 - ;and r8, r8, lr ; -1 for each element that equals 4 - ;qadd8 r10, r8, r12 ; u = vp9_signed_char_clamp(s + Filter2) - ;qsub8 r9 , r9, r12 ; qs0 = vp9_signed_char_clamp(qs0 - Filter2) - ;qadd8 r11, r11, r10 ; ps0 = vp9_signed_char_clamp(ps0 + u) - - ;end of modification for vp8 - - bic r12, r7, r6 ; vp9_filter &= ~hev ( r6 is free) - ;mov r12, r7 - - ;roughly 3/7th difference across boundary - mov lr, #0x1b ; 27 - mov r7, #0x3f ; 63 - - sxtb16 r6, r12 - sxtb16 r10, r12, ror #8 - smlabb r8, r6, lr, r7 - smlatb r6, r6, lr, r7 - smlabb r7, r10, lr, r7 - smultb r10, r10, lr - ssat r8, #8, r8, asr #7 - ssat r6, #8, r6, asr #7 - add r10, r10, #63 - ssat r7, #8, r7, asr #7 - ssat r10, #8, r10, asr #7 - - ldr lr, c0x80808080 - - pkhbt r6, r8, r6, lsl #16 - pkhbt r10, r7, r10, lsl #16 - uxtb16 r6, r6 - uxtb16 r10, r10 - - sub src, src, pstep - - orr r10, r6, r10, lsl #8 ; u = vp9_signed_char_clamp((63 + Filter2 * 27)>>7) - - qsub8 r8, r9, r10 ; s = vp9_signed_char_clamp(qs0 - u) - qadd8 r10, r11, r10 ; s = vp9_signed_char_clamp(ps0 + u) - eor r8, r8, lr ; *oq0 = s^0x80 - str r8, [src] ; store *oq0 - sub src, src, pstep - eor r10, r10, lr ; *op0 = s^0x80 - str r10, [src] ; store *op0 - - ;roughly 2/7th difference across boundary - mov lr, #0x12 ; 18 - mov r7, #0x3f ; 63 - - sxtb16 r6, r12 - sxtb16 r10, r12, ror #8 - smlabb r8, r6, lr, r7 - smlatb r6, r6, lr, r7 - smlabb r9, r10, lr, r7 - smlatb r10, r10, lr, r7 - ssat r8, #8, r8, asr #7 - ssat r6, #8, r6, asr #7 - ssat r9, #8, r9, asr #7 - ssat r10, #8, r10, asr #7 - - ldr lr, c0x80808080 - - pkhbt r6, r8, r6, lsl #16 - pkhbt r10, r9, r10, lsl #16 - - ldr r9, [sp, #8] ; load qs1 - ldr r11, [sp, #12] ; load ps1 - - uxtb16 r6, r6 - uxtb16 r10, r10 - - sub src, src, pstep - - orr r10, r6, r10, lsl #8 ; u = vp9_signed_char_clamp((63 + Filter2 * 18)>>7) - - qadd8 r11, r11, r10 ; s = vp9_signed_char_clamp(ps1 + u) - qsub8 r8, r9, r10 ; s = vp9_signed_char_clamp(qs1 - u) - eor r11, r11, lr ; *op1 = s^0x80 - str r11, [src], pstep ; store *op1 - eor r8, r8, lr ; *oq1 = s^0x80 - add src, src, pstep, lsl #1 - - mov r7, #0x3f ; 63 - - str r8, [src], pstep ; store *oq1 - - ;roughly 1/7th difference across boundary - mov lr, #0x9 ; 9 - ldr r9, [src] ; load q2 - - sxtb16 r6, r12 - sxtb16 r10, r12, ror #8 - smlabb r8, r6, lr, r7 - smlatb r6, r6, lr, r7 - smlabb r12, r10, lr, r7 - smlatb r10, r10, lr, r7 - ssat r8, #8, r8, asr #7 - ssat r6, #8, r6, asr #7 - ssat r12, #8, r12, asr #7 - ssat r10, #8, r10, asr #7 - - sub src, src, pstep, lsl #2 - - pkhbt r6, r8, r6, lsl #16 - pkhbt r10, r12, r10, lsl #16 - - sub src, src, pstep - ldr lr, c0x80808080 - - ldr r11, [src] ; load p2 - - uxtb16 r6, r6 - uxtb16 r10, r10 - - eor r9, r9, lr - eor r11, r11, lr - - orr r10, r6, r10, lsl #8 ; u = vp9_signed_char_clamp((63 + Filter2 * 9)>>7) - - qadd8 r8, r11, r10 ; s = vp9_signed_char_clamp(ps2 + u) - qsub8 r10, r9, r10 ; s = vp9_signed_char_clamp(qs2 - u) - eor r8, r8, lr ; *op2 = s^0x80 - str r8, [src], pstep, lsl #2 ; store *op2 - add src, src, pstep - eor r10, r10, lr ; *oq2 = s^0x80 - str r10, [src], pstep, lsl #1 ; store *oq2 - -|mbhskip_filter| - add src, src, #4 - sub src, src, pstep, lsl #3 - subs count, count, #1 - - ldrne r9, [src], pstep ; p3 - ldrne r10, [src], pstep ; p2 - ldrne r11, [src], pstep ; p1 - - bne MBHnext8 - - add sp, sp, #16 - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp8_mbloop_filter_horizontal_edge_armv6| - - -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- -|vp9_loop_filter_vertical_edge_armv6| PROC -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - stmdb sp!, {r4 - r11, lr} - - sub src, src, #4 ; move src pointer down by 4 - ldr count, [sp, #40] ; count for 8-in-parallel - ldr r12, [sp, #36] ; load thresh address - sub sp, sp, #16 ; create temp buffer - - ldr r6, [src], pstep ; load source data - ldrb r4, [r2] ; blimit - ldr r7, [src], pstep - ldrb r2, [r3] ; limit - ldr r8, [src], pstep - orr r4, r4, r4, lsl #8 - ldrb r3, [r12] ; thresh - orr r2, r2, r2, lsl #8 - ldr lr, [src], pstep - mov count, count, lsl #1 ; 4-in-parallel - orr r4, r4, r4, lsl #16 - orr r3, r3, r3, lsl #8 - orr r2, r2, r2, lsl #16 - orr r3, r3, r3, lsl #16 - -|Vnext8| - - ; vp9_filter_mask() function - ; calculate breakout conditions - ; transpose the source data for 4-in-parallel operation - TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12 - - uqsub8 r7, r9, r10 ; p3 - p2 - uqsub8 r8, r10, r9 ; p2 - p3 - uqsub8 r9, r10, r11 ; p2 - p1 - uqsub8 r10, r11, r10 ; p1 - p2 - orr r7, r7, r8 ; abs (p3-p2) - orr r10, r9, r10 ; abs (p2-p1) - uqsub8 lr, r7, r2 ; compare to limit. lr: vp9_filter_mask - uqsub8 r10, r10, r2 ; compare to limit - - sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines - - orr lr, lr, r10 - - uqsub8 r6, r11, r12 ; p1 - p0 - uqsub8 r7, r12, r11 ; p0 - p1 - add src, src, #4 ; move src pointer up by 4 - orr r6, r6, r7 ; abs (p1-p0) - str r11, [sp, #12] ; save p1 - uqsub8 r10, r6, r2 ; compare to limit - uqsub8 r11, r6, r3 ; compare to thresh - orr lr, lr, r10 - - ; transpose uses 8 regs(r6 - r12 and lr). Need to save reg value now - ; transpose the source data for 4-in-parallel operation - ldr r6, [src], pstep ; load source data - str r11, [sp] ; push r11 to stack - ldr r7, [src], pstep - str r12, [sp, #4] ; save current reg before load q0 - q3 data - ldr r8, [src], pstep - str lr, [sp, #8] - ldr lr, [src], pstep - - TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12 - - ldr lr, [sp, #8] ; load back (f)limit accumulator - - uqsub8 r6, r12, r11 ; q3 - q2 - uqsub8 r7, r11, r12 ; q2 - q3 - uqsub8 r12, r11, r10 ; q2 - q1 - uqsub8 r11, r10, r11 ; q1 - q2 - orr r6, r6, r7 ; abs (q3-q2) - orr r7, r12, r11 ; abs (q2-q1) - uqsub8 r6, r6, r2 ; compare to limit - uqsub8 r7, r7, r2 ; compare to limit - ldr r11, [sp, #4] ; load back p0 - ldr r12, [sp, #12] ; load back p1 - orr lr, lr, r6 - orr lr, lr, r7 - - uqsub8 r6, r11, r9 ; p0 - q0 - uqsub8 r7, r9, r11 ; q0 - p0 - uqsub8 r8, r12, r10 ; p1 - q1 - uqsub8 r11, r10, r12 ; q1 - p1 - orr r6, r6, r7 ; abs (p0-q0) - ldr r7, c0x7F7F7F7F - orr r8, r8, r11 ; abs (p1-q1) - uqadd8 r6, r6, r6 ; abs (p0-q0) * 2 - and r8, r7, r8, lsr #1 ; abs (p1-q1) / 2 - uqsub8 r11, r10, r9 ; q1 - q0 - uqadd8 r6, r8, r6 ; abs (p0-q0)*2 + abs (p1-q1)/2 - uqsub8 r12, r9, r10 ; q0 - q1 - uqsub8 r6, r6, r4 ; compare to flimit - - orr r9, r11, r12 ; abs (q1-q0) - uqsub8 r8, r9, r2 ; compare to limit - uqsub8 r10, r9, r3 ; compare to thresh - orr lr, lr, r6 - orr lr, lr, r8 - - mvn r11, #0 ; r11 == -1 - mov r12, #0 - - usub8 lr, r12, lr - ldr r9, [sp] ; load the compared result - sel lr, r11, r12 ; filter mask: lr - - cmp lr, #0 - beq vskip_filter ; skip filtering - - ;vp8_hevmask() function - ;calculate high edge variance - - sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines - - orr r9, r9, r10 - - ldrh r7, [src, #-2] - ldrh r8, [src], pstep - - usub8 r9, r12, r9 - sel r6, r12, r11 ; hev mask: r6 - - ;vp9_filter() function - ; load soure data to r6, r11, r12, lr - ldrh r9, [src, #-2] - ldrh r10, [src], pstep - - pkhbt r12, r7, r8, lsl #16 - - ldrh r7, [src, #-2] - ldrh r8, [src], pstep - - pkhbt r11, r9, r10, lsl #16 - - ldrh r9, [src, #-2] - ldrh r10, [src], pstep - - ; Transpose needs 8 regs(r6 - r12, and lr). Save r6 and lr first - str r6, [sp] - str lr, [sp, #4] - - pkhbt r6, r7, r8, lsl #16 - pkhbt lr, r9, r10, lsl #16 - - ;transpose r12, r11, r6, lr to r7, r8, r9, r10 - TRANSPOSE_MATRIX r12, r11, r6, lr, r7, r8, r9, r10 - - ;load back hev_mask r6 and filter_mask lr - ldr r12, c0x80808080 - ldr r6, [sp] - ldr lr, [sp, #4] - - eor r7, r7, r12 ; p1 offset to convert to a signed value - eor r8, r8, r12 ; p0 offset to convert to a signed value - eor r9, r9, r12 ; q0 offset to convert to a signed value - eor r10, r10, r12 ; q1 offset to convert to a signed value - - str r9, [sp] ; store qs0 temporarily - str r8, [sp, #4] ; store ps0 temporarily - str r10, [sp, #8] ; store qs1 temporarily - str r7, [sp, #12] ; store ps1 temporarily - - qsub8 r7, r7, r10 ; vp9_signed_char_clamp(ps1-qs1) - qsub8 r8, r9, r8 ; vp9_signed_char_clamp(vp9_filter + 3 * ( qs0 - ps0)) - - and r7, r7, r6 ; vp9_filter (r7) &= hev (r7 : filter) - - qadd8 r7, r7, r8 - ldr r9, c0x03030303 ; r9 = 3 --modified for vp8 - - qadd8 r7, r7, r8 - ldr r10, c0x04040404 - - qadd8 r7, r7, r8 - ;mvn r11, #0 ; r11 == -1 - - and r7, r7, lr ; vp9_filter &= mask - - ;modify code for vp8 -- Filter1 = vp9_filter (r7) - qadd8 r8 , r7 , r9 ; Filter2 (r8) = vp9_signed_char_clamp(vp9_filter+3) - qadd8 r7 , r7 , r10 ; vp9_filter = vp9_signed_char_clamp(vp9_filter+4) - - mov r9, #0 - shadd8 r8 , r8 , r9 ; Filter2 >>= 3 - shadd8 r7 , r7 , r9 ; vp9_filter >>= 3 - shadd8 r8 , r8 , r9 - shadd8 r7 , r7 , r9 - shadd8 lr , r8 , r9 ; lr: filter2 - shadd8 r7 , r7 , r9 ; r7: filter - - ;usub8 lr, r8, r10 ; s = (s==4)*-1 - ;sel lr, r11, r9 - ;usub8 r8, r10, r8 - ;sel r8, r11, r9 - ;and r8, r8, lr ; -1 for each element that equals 4 -- r8: s - - ;calculate output - ;qadd8 lr, r8, r7 ; u = vp9_signed_char_clamp(s + vp9_filter) - - ldr r8, [sp] ; load qs0 - ldr r9, [sp, #4] ; load ps0 - - ldr r10, c0x01010101 - - qsub8 r8, r8, r7 ; u = vp9_signed_char_clamp(qs0 - vp9_filter) - qadd8 r9, r9, lr ; u = vp9_signed_char_clamp(ps0 + Filter2) - ;end of modification for vp8 - - eor r8, r8, r12 - eor r9, r9, r12 - - mov lr, #0 - - sadd8 r7, r7, r10 - shadd8 r7, r7, lr - - ldr r10, [sp, #8] ; load qs1 - ldr r11, [sp, #12] ; load ps1 - - bic r7, r7, r6 ; r7: vp9_filter - - qsub8 r10 , r10, r7 ; u = vp9_signed_char_clamp(qs1 - vp9_filter) - qadd8 r11, r11, r7 ; u = vp9_signed_char_clamp(ps1 + vp9_filter) - eor r10, r10, r12 - eor r11, r11, r12 - - sub src, src, pstep, lsl #2 - - ;we can use TRANSPOSE_MATRIX macro to transpose output - input: q1, q0, p0, p1 - ;output is b0, b1, b2, b3 - ;b0: 03 02 01 00 - ;b1: 13 12 11 10 - ;b2: 23 22 21 20 - ;b3: 33 32 31 30 - ; p1 p0 q0 q1 - ; (a3 a2 a1 a0) - TRANSPOSE_MATRIX r11, r9, r8, r10, r6, r7, r12, lr - - strh r6, [src, #-2] ; store the result - mov r6, r6, lsr #16 - strh r6, [src], pstep - - strh r7, [src, #-2] - mov r7, r7, lsr #16 - strh r7, [src], pstep - - strh r12, [src, #-2] - mov r12, r12, lsr #16 - strh r12, [src], pstep - - strh lr, [src, #-2] - mov lr, lr, lsr #16 - strh lr, [src], pstep - -|vskip_filter| - sub src, src, #4 - subs count, count, #1 - - ldrne r6, [src], pstep ; load source data - ldrne r7, [src], pstep - ldrne r8, [src], pstep - ldrne lr, [src], pstep - - bne Vnext8 - - add sp, sp, #16 - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp9_loop_filter_vertical_edge_armv6| - - - -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- -|vp8_mbloop_filter_vertical_edge_armv6| PROC -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - stmdb sp!, {r4 - r11, lr} - - sub src, src, #4 ; move src pointer down by 4 - ldr count, [sp, #40] ; count for 8-in-parallel - ldr r12, [sp, #36] ; load thresh address - pld [src, #23] ; preload for next block - sub sp, sp, #16 ; create temp buffer - - ldr r6, [src], pstep ; load source data - ldrb r4, [r2] ; blimit - pld [src, #23] - ldr r7, [src], pstep - ldrb r2, [r3] ; limit - pld [src, #23] - ldr r8, [src], pstep - orr r4, r4, r4, lsl #8 - ldrb r3, [r12] ; thresh - orr r2, r2, r2, lsl #8 - pld [src, #23] - ldr lr, [src], pstep - mov count, count, lsl #1 ; 4-in-parallel - orr r4, r4, r4, lsl #16 - orr r3, r3, r3, lsl #8 - orr r2, r2, r2, lsl #16 - orr r3, r3, r3, lsl #16 - -|MBVnext8| - ; vp9_filter_mask() function - ; calculate breakout conditions - ; transpose the source data for 4-in-parallel operation - TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12 - - uqsub8 r7, r9, r10 ; p3 - p2 - uqsub8 r8, r10, r9 ; p2 - p3 - uqsub8 r9, r10, r11 ; p2 - p1 - uqsub8 r10, r11, r10 ; p1 - p2 - orr r7, r7, r8 ; abs (p3-p2) - orr r10, r9, r10 ; abs (p2-p1) - uqsub8 lr, r7, r2 ; compare to limit. lr: vp9_filter_mask - uqsub8 r10, r10, r2 ; compare to limit - - sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines - - orr lr, lr, r10 - - uqsub8 r6, r11, r12 ; p1 - p0 - uqsub8 r7, r12, r11 ; p0 - p1 - add src, src, #4 ; move src pointer up by 4 - orr r6, r6, r7 ; abs (p1-p0) - str r11, [sp, #12] ; save p1 - uqsub8 r10, r6, r2 ; compare to limit - uqsub8 r11, r6, r3 ; compare to thresh - orr lr, lr, r10 - - ; transpose uses 8 regs(r6 - r12 and lr). Need to save reg value now - ; transpose the source data for 4-in-parallel operation - ldr r6, [src], pstep ; load source data - str r11, [sp] ; push r11 to stack - ldr r7, [src], pstep - str r12, [sp, #4] ; save current reg before load q0 - q3 data - ldr r8, [src], pstep - str lr, [sp, #8] - ldr lr, [src], pstep - - - TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12 - - ldr lr, [sp, #8] ; load back (f)limit accumulator - - uqsub8 r6, r12, r11 ; q3 - q2 - uqsub8 r7, r11, r12 ; q2 - q3 - uqsub8 r12, r11, r10 ; q2 - q1 - uqsub8 r11, r10, r11 ; q1 - q2 - orr r6, r6, r7 ; abs (q3-q2) - orr r7, r12, r11 ; abs (q2-q1) - uqsub8 r6, r6, r2 ; compare to limit - uqsub8 r7, r7, r2 ; compare to limit - ldr r11, [sp, #4] ; load back p0 - ldr r12, [sp, #12] ; load back p1 - orr lr, lr, r6 - orr lr, lr, r7 - - uqsub8 r6, r11, r9 ; p0 - q0 - uqsub8 r7, r9, r11 ; q0 - p0 - uqsub8 r8, r12, r10 ; p1 - q1 - uqsub8 r11, r10, r12 ; q1 - p1 - orr r6, r6, r7 ; abs (p0-q0) - ldr r7, c0x7F7F7F7F - orr r8, r8, r11 ; abs (p1-q1) - uqadd8 r6, r6, r6 ; abs (p0-q0) * 2 - and r8, r7, r8, lsr #1 ; abs (p1-q1) / 2 - uqsub8 r11, r10, r9 ; q1 - q0 - uqadd8 r6, r8, r6 ; abs (p0-q0)*2 + abs (p1-q1)/2 - uqsub8 r12, r9, r10 ; q0 - q1 - uqsub8 r6, r6, r4 ; compare to flimit - - orr r9, r11, r12 ; abs (q1-q0) - uqsub8 r8, r9, r2 ; compare to limit - uqsub8 r10, r9, r3 ; compare to thresh - orr lr, lr, r6 - orr lr, lr, r8 - - mvn r11, #0 ; r11 == -1 - mov r12, #0 - - usub8 lr, r12, lr - ldr r9, [sp] ; load the compared result - sel lr, r11, r12 ; filter mask: lr - - cmp lr, #0 - beq mbvskip_filter ; skip filtering - - - - ;vp8_hevmask() function - ;calculate high edge variance - - sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines - - orr r9, r9, r10 - - ldrh r7, [src, #-2] - ldrh r8, [src], pstep - - usub8 r9, r12, r9 - sel r6, r12, r11 ; hev mask: r6 - - - ; vp8_mbfilter() function - ; p2, q2 are only needed at the end. Don't need to load them in now. - ; Transpose needs 8 regs(r6 - r12, and lr). Save r6 and lr first - ; load soure data to r6, r11, r12, lr - ldrh r9, [src, #-2] - ldrh r10, [src], pstep - - pkhbt r12, r7, r8, lsl #16 - - ldrh r7, [src, #-2] - ldrh r8, [src], pstep - - pkhbt r11, r9, r10, lsl #16 - - ldrh r9, [src, #-2] - ldrh r10, [src], pstep - - str r6, [sp] ; save r6 - str lr, [sp, #4] ; save lr - - pkhbt r6, r7, r8, lsl #16 - pkhbt lr, r9, r10, lsl #16 - - ;transpose r12, r11, r6, lr to p1, p0, q0, q1 - TRANSPOSE_MATRIX r12, r11, r6, lr, r7, r8, r9, r10 - - ;load back hev_mask r6 and filter_mask lr - ldr r12, c0x80808080 - ldr r6, [sp] - ldr lr, [sp, #4] - - eor r7, r7, r12 ; ps1 - eor r8, r8, r12 ; ps0 - eor r9, r9, r12 ; qs0 - eor r10, r10, r12 ; qs1 - - qsub8 r12, r9, r8 ; vp9_signed_char_clamp(vp9_filter + 3 * ( qs0 - ps0)) - str r7, [sp, #12] ; store ps1 temporarily - qsub8 r7, r7, r10 ; vp9_signed_char_clamp(ps1-qs1) - str r10, [sp, #8] ; store qs1 temporarily - qadd8 r7, r7, r12 - str r9, [sp] ; store qs0 temporarily - qadd8 r7, r7, r12 - str r8, [sp, #4] ; store ps0 temporarily - qadd8 r7, r7, r12 ; vp9_filter: r7 - - ldr r10, c0x03030303 ; r10 = 3 --modified for vp8 - ldr r9, c0x04040404 - ;mvn r11, #0 ; r11 == -1 - - and r7, r7, lr ; vp9_filter &= mask (lr is free) - - mov r12, r7 ; Filter2: r12 - and r12, r12, r6 ; Filter2 &= hev - - ;modify code for vp8 - ;save bottom 3 bits so that we round one side +4 and the other +3 - qadd8 r8 , r12 , r9 ; Filter1 (r8) = vp9_signed_char_clamp(Filter2+4) - qadd8 r12 , r12 , r10 ; Filter2 (r12) = vp9_signed_char_clamp(Filter2+3) - - mov r10, #0 - shadd8 r8 , r8 , r10 ; Filter1 >>= 3 - shadd8 r12 , r12 , r10 ; Filter2 >>= 3 - shadd8 r8 , r8 , r10 - shadd8 r12 , r12 , r10 - shadd8 r8 , r8 , r10 ; r8: Filter1 - shadd8 r12 , r12 , r10 ; r12: Filter2 - - ldr r9, [sp] ; load qs0 - ldr r11, [sp, #4] ; load ps0 - - qsub8 r9 , r9, r8 ; qs0 = vp9_signed_char_clamp(qs0 - Filter1) - qadd8 r11, r11, r12 ; ps0 = vp9_signed_char_clamp(ps0 + Filter2) - - ;save bottom 3 bits so that we round one side +4 and the other +3 - ;and r8, r12, r10 ; s = Filter2 & 7 (s: r8) - ;qadd8 r12 , r12 , r9 ; Filter2 = vp9_signed_char_clamp(Filter2+4) - ;mov r10, #0 - ;shadd8 r12 , r12 , r10 ; Filter2 >>= 3 - ;usub8 lr, r8, r9 ; s = (s==4)*-1 - ;sel lr, r11, r10 - ;shadd8 r12 , r12 , r10 - ;usub8 r8, r9, r8 - ;sel r8, r11, r10 - ;ldr r9, [sp] ; load qs0 - ;ldr r11, [sp, #4] ; load ps0 - ;shadd8 r12 , r12 , r10 - ;and r8, r8, lr ; -1 for each element that equals 4 - ;qadd8 r10, r8, r12 ; u = vp9_signed_char_clamp(s + Filter2) - ;qsub8 r9 , r9, r12 ; qs0 = vp9_signed_char_clamp(qs0 - Filter2) - ;qadd8 r11, r11, r10 ; ps0 = vp9_signed_char_clamp(ps0 + u) - - ;end of modification for vp8 - - bic r12, r7, r6 ;vp9_filter &= ~hev ( r6 is free) - ;mov r12, r7 - - ;roughly 3/7th difference across boundary - mov lr, #0x1b ; 27 - mov r7, #0x3f ; 63 - - sxtb16 r6, r12 - sxtb16 r10, r12, ror #8 - smlabb r8, r6, lr, r7 - smlatb r6, r6, lr, r7 - smlabb r7, r10, lr, r7 - smultb r10, r10, lr - ssat r8, #8, r8, asr #7 - ssat r6, #8, r6, asr #7 - add r10, r10, #63 - ssat r7, #8, r7, asr #7 - ssat r10, #8, r10, asr #7 - - ldr lr, c0x80808080 - - pkhbt r6, r8, r6, lsl #16 - pkhbt r10, r7, r10, lsl #16 - uxtb16 r6, r6 - uxtb16 r10, r10 - - sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines - - orr r10, r6, r10, lsl #8 ; u = vp9_signed_char_clamp((63 + Filter2 * 27)>>7) - - qsub8 r8, r9, r10 ; s = vp9_signed_char_clamp(qs0 - u) - qadd8 r10, r11, r10 ; s = vp9_signed_char_clamp(ps0 + u) - eor r8, r8, lr ; *oq0 = s^0x80 - eor r10, r10, lr ; *op0 = s^0x80 - - strb r10, [src, #-1] ; store op0 result - strb r8, [src], pstep ; store oq0 result - mov r10, r10, lsr #8 - mov r8, r8, lsr #8 - strb r10, [src, #-1] - strb r8, [src], pstep - mov r10, r10, lsr #8 - mov r8, r8, lsr #8 - strb r10, [src, #-1] - strb r8, [src], pstep - mov r10, r10, lsr #8 - mov r8, r8, lsr #8 - strb r10, [src, #-1] - strb r8, [src], pstep - - ;roughly 2/7th difference across boundary - mov lr, #0x12 ; 18 - mov r7, #0x3f ; 63 - - sxtb16 r6, r12 - sxtb16 r10, r12, ror #8 - smlabb r8, r6, lr, r7 - smlatb r6, r6, lr, r7 - smlabb r9, r10, lr, r7 - - smlatb r10, r10, lr, r7 - ssat r8, #8, r8, asr #7 - ssat r6, #8, r6, asr #7 - ssat r9, #8, r9, asr #7 - ssat r10, #8, r10, asr #7 - - sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines - - pkhbt r6, r8, r6, lsl #16 - pkhbt r10, r9, r10, lsl #16 - - ldr r9, [sp, #8] ; load qs1 - ldr r11, [sp, #12] ; load ps1 - ldr lr, c0x80808080 - - uxtb16 r6, r6 - uxtb16 r10, r10 - - add src, src, #2 - - orr r10, r6, r10, lsl #8 ; u = vp9_signed_char_clamp((63 + Filter2 * 18)>>7) - - qsub8 r8, r9, r10 ; s = vp9_signed_char_clamp(qs1 - u) - qadd8 r10, r11, r10 ; s = vp9_signed_char_clamp(ps1 + u) - eor r8, r8, lr ; *oq1 = s^0x80 - eor r10, r10, lr ; *op1 = s^0x80 - - ldrb r11, [src, #-5] ; load p2 for 1/7th difference across boundary - strb r10, [src, #-4] ; store op1 - strb r8, [src, #-1] ; store oq1 - ldrb r9, [src], pstep ; load q2 for 1/7th difference across boundary - - mov r10, r10, lsr #8 - mov r8, r8, lsr #8 - - ldrb r6, [src, #-5] - strb r10, [src, #-4] - strb r8, [src, #-1] - ldrb r7, [src], pstep - - mov r10, r10, lsr #8 - mov r8, r8, lsr #8 - orr r11, r11, r6, lsl #8 - orr r9, r9, r7, lsl #8 - - ldrb r6, [src, #-5] - strb r10, [src, #-4] - strb r8, [src, #-1] - ldrb r7, [src], pstep - - mov r10, r10, lsr #8 - mov r8, r8, lsr #8 - orr r11, r11, r6, lsl #16 - orr r9, r9, r7, lsl #16 - - ldrb r6, [src, #-5] - strb r10, [src, #-4] - strb r8, [src, #-1] - ldrb r7, [src], pstep - orr r11, r11, r6, lsl #24 - orr r9, r9, r7, lsl #24 - - ;roughly 1/7th difference across boundary - eor r9, r9, lr - eor r11, r11, lr - - mov lr, #0x9 ; 9 - mov r7, #0x3f ; 63 - - sxtb16 r6, r12 - sxtb16 r10, r12, ror #8 - smlabb r8, r6, lr, r7 - smlatb r6, r6, lr, r7 - smlabb r12, r10, lr, r7 - smlatb r10, r10, lr, r7 - ssat r8, #8, r8, asr #7 - ssat r6, #8, r6, asr #7 - ssat r12, #8, r12, asr #7 - ssat r10, #8, r10, asr #7 - - sub src, src, pstep, lsl #2 - - pkhbt r6, r8, r6, lsl #16 - pkhbt r10, r12, r10, lsl #16 - - uxtb16 r6, r6 - uxtb16 r10, r10 - - ldr lr, c0x80808080 - - orr r10, r6, r10, lsl #8 ; u = vp9_signed_char_clamp((63 + Filter2 * 9)>>7) - - qadd8 r8, r11, r10 ; s = vp9_signed_char_clamp(ps2 + u) - qsub8 r10, r9, r10 ; s = vp9_signed_char_clamp(qs2 - u) - eor r8, r8, lr ; *op2 = s^0x80 - eor r10, r10, lr ; *oq2 = s^0x80 - - strb r8, [src, #-5] ; store *op2 - strb r10, [src], pstep ; store *oq2 - mov r8, r8, lsr #8 - mov r10, r10, lsr #8 - strb r8, [src, #-5] - strb r10, [src], pstep - mov r8, r8, lsr #8 - mov r10, r10, lsr #8 - strb r8, [src, #-5] - strb r10, [src], pstep - mov r8, r8, lsr #8 - mov r10, r10, lsr #8 - strb r8, [src, #-5] - strb r10, [src], pstep - - ;adjust src pointer for next loop - sub src, src, #2 - -|mbvskip_filter| - sub src, src, #4 - subs count, count, #1 - - pld [src, #23] ; preload for next block - ldrne r6, [src], pstep ; load source data - pld [src, #23] - ldrne r7, [src], pstep - pld [src, #23] - ldrne r8, [src], pstep - pld [src, #23] - ldrne lr, [src], pstep - - bne MBVnext8 - - add sp, sp, #16 - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp8_mbloop_filter_vertical_edge_armv6| - -; Constant Pool -c0x80808080 DCD 0x80808080 -c0x03030303 DCD 0x03030303 -c0x04040404 DCD 0x04040404 -c0x01010101 DCD 0x01010101 -c0x7F7F7F7F DCD 0x7F7F7F7F - - END diff --git a/vp9/common/arm/armv6/vp9_recon_v6.asm b/vp9/common/arm/armv6/vp9_recon_v6.asm deleted file mode 100644 index 99c7bcf2d..000000000 --- a/vp9/common/arm/armv6/vp9_recon_v6.asm +++ /dev/null @@ -1,281 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_recon_b_armv6| - EXPORT |vp8_recon2b_armv6| - EXPORT |vp8_recon4b_armv6| - - AREA |.text|, CODE, READONLY ; name this block of code -prd RN r0 -dif RN r1 -dst RN r2 -stride RN r3 - -;void recon_b(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride) -; R0 char* pred_ptr -; R1 short * dif_ptr -; R2 char * dst_ptr -; R3 int stride - -; Description: -; Loop through the block adding the Pred and Diff together. Clamp and then -; store back into the Dst. - -; Restrictions : -; all buffers are expected to be 4 byte aligned coming in and -; going out. -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -; -; -; -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -|vp8_recon_b_armv6| PROC - stmdb sp!, {r4 - r9, lr} - - ;0, 1, 2, 3 - ldr r4, [prd], #16 ; 3 | 2 | 1 | 0 - ldr r6, [dif, #0] ; 1 | 0 - ldr r7, [dif, #4] ; 3 | 2 - - pkhbt r8, r6, r7, lsl #16 ; 2 | 0 - pkhtb r9, r7, r6, asr #16 ; 3 | 1 - - uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0 - uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1 - - usat16 r8, #8, r8 - usat16 r9, #8, r9 - add dif, dif, #32 - orr r8, r8, r9, lsl #8 - - str r8, [dst], stride - - ;0, 1, 2, 3 - ldr r4, [prd], #16 ; 3 | 2 | 1 | 0 -;; ldr r6, [dif, #8] ; 1 | 0 -;; ldr r7, [dif, #12] ; 3 | 2 - ldr r6, [dif, #0] ; 1 | 0 - ldr r7, [dif, #4] ; 3 | 2 - - pkhbt r8, r6, r7, lsl #16 ; 2 | 0 - pkhtb r9, r7, r6, asr #16 ; 3 | 1 - - uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0 - uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1 - - usat16 r8, #8, r8 - usat16 r9, #8, r9 - add dif, dif, #32 - orr r8, r8, r9, lsl #8 - - str r8, [dst], stride - - ;0, 1, 2, 3 - ldr r4, [prd], #16 ; 3 | 2 | 1 | 0 -;; ldr r6, [dif, #16] ; 1 | 0 -;; ldr r7, [dif, #20] ; 3 | 2 - ldr r6, [dif, #0] ; 1 | 0 - ldr r7, [dif, #4] ; 3 | 2 - - pkhbt r8, r6, r7, lsl #16 ; 2 | 0 - pkhtb r9, r7, r6, asr #16 ; 3 | 1 - - uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0 - uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1 - - usat16 r8, #8, r8 - usat16 r9, #8, r9 - add dif, dif, #32 - orr r8, r8, r9, lsl #8 - - str r8, [dst], stride - - ;0, 1, 2, 3 - ldr r4, [prd], #16 ; 3 | 2 | 1 | 0 -;; ldr r6, [dif, #24] ; 1 | 0 -;; ldr r7, [dif, #28] ; 3 | 2 - ldr r6, [dif, #0] ; 1 | 0 - ldr r7, [dif, #4] ; 3 | 2 - - pkhbt r8, r6, r7, lsl #16 ; 2 | 0 - pkhtb r9, r7, r6, asr #16 ; 3 | 1 - - uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0 - uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1 - - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst], stride - - ldmia sp!, {r4 - r9, pc} - - ENDP ; |recon_b| - -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -; -; -; -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -; R0 char *pred_ptr -; R1 short *dif_ptr -; R2 char *dst_ptr -; R3 int stride -|vp8_recon4b_armv6| PROC - stmdb sp!, {r4 - r9, lr} - - mov lr, #4 - -recon4b_loop - ;0, 1, 2, 3 - ldr r4, [prd], #4 ; 3 | 2 | 1 | 0 - ldr r6, [dif, #0] ; 1 | 0 - ldr r7, [dif, #4] ; 3 | 2 - - pkhbt r8, r6, r7, lsl #16 ; 2 | 0 - pkhtb r9, r7, r6, asr #16 ; 3 | 1 - - uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0 - uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1 - - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst] - - ;4, 5, 6, 7 - ldr r4, [prd], #4 -;; ldr r6, [dif, #32] -;; ldr r7, [dif, #36] - ldr r6, [dif, #8] - ldr r7, [dif, #12] - - pkhbt r8, r6, r7, lsl #16 - pkhtb r9, r7, r6, asr #16 - - uxtab16 r8, r8, r4 - uxtab16 r9, r9, r4, ror #8 - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst, #4] - - ;8, 9, 10, 11 - ldr r4, [prd], #4 -;; ldr r6, [dif, #64] -;; ldr r7, [dif, #68] - ldr r6, [dif, #16] - ldr r7, [dif, #20] - - pkhbt r8, r6, r7, lsl #16 - pkhtb r9, r7, r6, asr #16 - - uxtab16 r8, r8, r4 - uxtab16 r9, r9, r4, ror #8 - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst, #8] - - ;12, 13, 14, 15 - ldr r4, [prd], #4 -;; ldr r6, [dif, #96] -;; ldr r7, [dif, #100] - ldr r6, [dif, #24] - ldr r7, [dif, #28] - - pkhbt r8, r6, r7, lsl #16 - pkhtb r9, r7, r6, asr #16 - - uxtab16 r8, r8, r4 - uxtab16 r9, r9, r4, ror #8 - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst, #12] - - add dst, dst, stride -;; add dif, dif, #8 - add dif, dif, #32 - - subs lr, lr, #1 - bne recon4b_loop - - ldmia sp!, {r4 - r9, pc} - - ENDP ; |Recon4B| - -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -; -; -; -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -; R0 char *pred_ptr -; R1 short *dif_ptr -; R2 char *dst_ptr -; R3 int stride -|vp8_recon2b_armv6| PROC - stmdb sp!, {r4 - r9, lr} - - mov lr, #4 - -recon2b_loop - ;0, 1, 2, 3 - ldr r4, [prd], #4 - ldr r6, [dif, #0] - ldr r7, [dif, #4] - - pkhbt r8, r6, r7, lsl #16 - pkhtb r9, r7, r6, asr #16 - - uxtab16 r8, r8, r4 - uxtab16 r9, r9, r4, ror #8 - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst] - - ;4, 5, 6, 7 - ldr r4, [prd], #4 -;; ldr r6, [dif, #32] -;; ldr r7, [dif, #36] - ldr r6, [dif, #8] - ldr r7, [dif, #12] - - pkhbt r8, r6, r7, lsl #16 - pkhtb r9, r7, r6, asr #16 - - uxtab16 r8, r8, r4 - uxtab16 r9, r9, r4, ror #8 - usat16 r8, #8, r8 - usat16 r9, #8, r9 - orr r8, r8, r9, lsl #8 - - str r8, [dst, #4] - - add dst, dst, stride -;; add dif, dif, #8 - add dif, dif, #16 - - subs lr, lr, #1 - bne recon2b_loop - - ldmia sp!, {r4 - r9, pc} - - ENDP ; |Recon2B| - - END diff --git a/vp9/common/arm/armv6/vp9_simpleloopfilter_v6.asm b/vp9/common/arm/armv6/vp9_simpleloopfilter_v6.asm deleted file mode 100644 index 8306912be..000000000 --- a/vp9/common/arm/armv6/vp9_simpleloopfilter_v6.asm +++ /dev/null @@ -1,286 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_loop_filter_simple_horizontal_edge_armv6| - EXPORT |vp9_loop_filter_simple_vertical_edge_armv6| - - AREA |.text|, CODE, READONLY ; name this block of code - - MACRO - TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3 - ; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3 - ; a0: 03 02 01 00 - ; a1: 13 12 11 10 - ; a2: 23 22 21 20 - ; a3: 33 32 31 30 - ; b3 b2 b1 b0 - - uxtb16 $b1, $a1 ; xx 12 xx 10 - uxtb16 $b0, $a0 ; xx 02 xx 00 - uxtb16 $b3, $a3 ; xx 32 xx 30 - uxtb16 $b2, $a2 ; xx 22 xx 20 - orr $b1, $b0, $b1, lsl #8 ; 12 02 10 00 - orr $b3, $b2, $b3, lsl #8 ; 32 22 30 20 - - uxtb16 $a1, $a1, ror #8 ; xx 13 xx 11 - uxtb16 $a3, $a3, ror #8 ; xx 33 xx 31 - uxtb16 $a0, $a0, ror #8 ; xx 03 xx 01 - uxtb16 $a2, $a2, ror #8 ; xx 23 xx 21 - orr $a0, $a0, $a1, lsl #8 ; 13 03 11 01 - orr $a2, $a2, $a3, lsl #8 ; 33 23 31 21 - - pkhtb $b2, $b3, $b1, asr #16 ; 32 22 12 02 -- p1 - pkhbt $b0, $b1, $b3, lsl #16 ; 30 20 10 00 -- p3 - - pkhtb $b3, $a2, $a0, asr #16 ; 33 23 13 03 -- p0 - pkhbt $b1, $a0, $a2, lsl #16 ; 31 21 11 01 -- p2 - MEND - - - -src RN r0 -pstep RN r1 - -;r0 unsigned char *src_ptr, -;r1 int src_pixel_step, -;r2 const char *blimit - -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- -|vp9_loop_filter_simple_horizontal_edge_armv6| PROC -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - stmdb sp!, {r4 - r11, lr} - - ldrb r12, [r2] ; blimit - ldr r3, [src, -pstep, lsl #1] ; p1 - ldr r4, [src, -pstep] ; p0 - ldr r5, [src] ; q0 - ldr r6, [src, pstep] ; q1 - orr r12, r12, r12, lsl #8 ; blimit - ldr r2, c0x80808080 - orr r12, r12, r12, lsl #16 ; blimit - mov r9, #4 ; double the count. we're doing 4 at a time - mov lr, #0 ; need 0 in a couple places - -|simple_hnext8| - ; vp8_simple_filter_mask() - - uqsub8 r7, r3, r6 ; p1 - q1 - uqsub8 r8, r6, r3 ; q1 - p1 - uqsub8 r10, r4, r5 ; p0 - q0 - uqsub8 r11, r5, r4 ; q0 - p0 - orr r8, r8, r7 ; abs(p1 - q1) - orr r10, r10, r11 ; abs(p0 - q0) - uqadd8 r10, r10, r10 ; abs(p0 - q0) * 2 - uhadd8 r8, r8, lr ; abs(p1 - q2) >> 1 - uqadd8 r10, r10, r8 ; abs(p0 - q0)*2 + abs(p1 - q1)/2 - mvn r8, #0 - usub8 r10, r12, r10 ; compare to flimit. usub8 sets GE flags - sel r10, r8, lr ; filter mask: F or 0 - cmp r10, #0 - beq simple_hskip_filter ; skip filtering if all masks are 0x00 - - ;vp8_simple_filter() - - eor r3, r3, r2 ; p1 offset to convert to a signed value - eor r6, r6, r2 ; q1 offset to convert to a signed value - eor r4, r4, r2 ; p0 offset to convert to a signed value - eor r5, r5, r2 ; q0 offset to convert to a signed value - - qsub8 r3, r3, r6 ; vp9_filter = p1 - q1 - qsub8 r6, r5, r4 ; q0 - p0 - qadd8 r3, r3, r6 ; += q0 - p0 - ldr r7, c0x04040404 - qadd8 r3, r3, r6 ; += q0 - p0 - ldr r8, c0x03030303 - qadd8 r3, r3, r6 ; vp9_filter = p1-q1 + 3*(q0-p0)) - ;STALL - and r3, r3, r10 ; vp9_filter &= mask - - qadd8 r7 , r3 , r7 ; Filter1 = vp9_filter + 4 - qadd8 r8 , r3 , r8 ; Filter2 = vp9_filter + 3 - - shadd8 r7 , r7 , lr - shadd8 r8 , r8 , lr - shadd8 r7 , r7 , lr - shadd8 r8 , r8 , lr - shadd8 r7 , r7 , lr ; Filter1 >>= 3 - shadd8 r8 , r8 , lr ; Filter2 >>= 3 - - qsub8 r5 ,r5, r7 ; u = q0 - Filter1 - qadd8 r4, r4, r8 ; u = p0 + Filter2 - eor r5, r5, r2 ; *oq0 = u^0x80 - str r5, [src] ; store oq0 result - eor r4, r4, r2 ; *op0 = u^0x80 - str r4, [src, -pstep] ; store op0 result - -|simple_hskip_filter| - subs r9, r9, #1 - addne src, src, #4 ; next row - - ldrne r3, [src, -pstep, lsl #1] ; p1 - ldrne r4, [src, -pstep] ; p0 - ldrne r5, [src] ; q0 - ldrne r6, [src, pstep] ; q1 - - bne simple_hnext8 - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp9_loop_filter_simple_horizontal_edge_armv6| - - -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- -|vp9_loop_filter_simple_vertical_edge_armv6| PROC -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - stmdb sp!, {r4 - r11, lr} - - ldrb r12, [r2] ; r12: blimit - ldr r2, c0x80808080 - orr r12, r12, r12, lsl #8 - - ; load soure data to r7, r8, r9, r10 - ldrh r3, [src, #-2] - pld [src, #23] ; preload for next block - ldrh r4, [src], pstep - orr r12, r12, r12, lsl #16 - - ldrh r5, [src, #-2] - pld [src, #23] - ldrh r6, [src], pstep - - pkhbt r7, r3, r4, lsl #16 - - ldrh r3, [src, #-2] - pld [src, #23] - ldrh r4, [src], pstep - - pkhbt r8, r5, r6, lsl #16 - - ldrh r5, [src, #-2] - pld [src, #23] - ldrh r6, [src], pstep - mov r11, #4 ; double the count. we're doing 4 at a time - -|simple_vnext8| - ; vp8_simple_filter_mask() function - pkhbt r9, r3, r4, lsl #16 - pkhbt r10, r5, r6, lsl #16 - - ;transpose r7, r8, r9, r10 to r3, r4, r5, r6 - TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6 - - uqsub8 r7, r3, r6 ; p1 - q1 - uqsub8 r8, r6, r3 ; q1 - p1 - uqsub8 r9, r4, r5 ; p0 - q0 - uqsub8 r10, r5, r4 ; q0 - p0 - orr r7, r7, r8 ; abs(p1 - q1) - orr r9, r9, r10 ; abs(p0 - q0) - mov r8, #0 - uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2 - uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2 - uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2 - mvn r10, #0 ; r10 == -1 - - usub8 r7, r12, r7 ; compare to flimit - sel lr, r10, r8 ; filter mask - - cmp lr, #0 - beq simple_vskip_filter ; skip filtering - - ;vp8_simple_filter() function - eor r3, r3, r2 ; p1 offset to convert to a signed value - eor r6, r6, r2 ; q1 offset to convert to a signed value - eor r4, r4, r2 ; p0 offset to convert to a signed value - eor r5, r5, r2 ; q0 offset to convert to a signed value - - qsub8 r3, r3, r6 ; vp9_filter = p1 - q1 - qsub8 r6, r5, r4 ; q0 - p0 - - qadd8 r3, r3, r6 ; vp9_filter += q0 - p0 - ldr r9, c0x03030303 ; r9 = 3 - - qadd8 r3, r3, r6 ; vp9_filter += q0 - p0 - ldr r7, c0x04040404 - - qadd8 r3, r3, r6 ; vp9_filter = p1-q1 + 3*(q0-p0)) - ;STALL - and r3, r3, lr ; vp9_filter &= mask - - qadd8 r9 , r3 , r9 ; Filter2 = vp9_filter + 3 - qadd8 r3 , r3 , r7 ; Filter1 = vp9_filter + 4 - - shadd8 r9 , r9 , r8 - shadd8 r3 , r3 , r8 - shadd8 r9 , r9 , r8 - shadd8 r3 , r3 , r8 - shadd8 r9 , r9 , r8 ; Filter2 >>= 3 - shadd8 r3 , r3 , r8 ; Filter1 >>= 3 - - ;calculate output - sub src, src, pstep, lsl #2 - - qadd8 r4, r4, r9 ; u = p0 + Filter2 - qsub8 r5, r5, r3 ; u = q0 - Filter1 - eor r4, r4, r2 ; *op0 = u^0x80 - eor r5, r5, r2 ; *oq0 = u^0x80 - - strb r4, [src, #-1] ; store the result - mov r4, r4, lsr #8 - strb r5, [src], pstep - mov r5, r5, lsr #8 - - strb r4, [src, #-1] - mov r4, r4, lsr #8 - strb r5, [src], pstep - mov r5, r5, lsr #8 - - strb r4, [src, #-1] - mov r4, r4, lsr #8 - strb r5, [src], pstep - mov r5, r5, lsr #8 - - strb r4, [src, #-1] - strb r5, [src], pstep - -|simple_vskip_filter| - subs r11, r11, #1 - - ; load soure data to r7, r8, r9, r10 - ldrneh r3, [src, #-2] - pld [src, #23] ; preload for next block - ldrneh r4, [src], pstep - - ldrneh r5, [src, #-2] - pld [src, #23] - ldrneh r6, [src], pstep - - pkhbt r7, r3, r4, lsl #16 - - ldrneh r3, [src, #-2] - pld [src, #23] - ldrneh r4, [src], pstep - - pkhbt r8, r5, r6, lsl #16 - - ldrneh r5, [src, #-2] - pld [src, #23] - ldrneh r6, [src], pstep - - bne simple_vnext8 - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp9_loop_filter_simple_vertical_edge_armv6| - -; Constant Pool -c0x80808080 DCD 0x80808080 -c0x03030303 DCD 0x03030303 -c0x04040404 DCD 0x04040404 - - END diff --git a/vp9/common/arm/armv6/vp9_sixtappredict8x4_v6.asm b/vp9/common/arm/armv6/vp9_sixtappredict8x4_v6.asm deleted file mode 100644 index 5bf94e090..000000000 --- a/vp9/common/arm/armv6/vp9_sixtappredict8x4_v6.asm +++ /dev/null @@ -1,273 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sixtap_predict8x4_armv6| - - AREA |.text|, CODE, READONLY ; name this block of code -;------------------------------------- -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; stack unsigned char *dst_ptr, -; stack int dst_pitch -;------------------------------------- -;note: In first pass, store the result in transpose(8linesx9columns) on stack. Temporary stack size is 184. -;Line width is 20 that is 9 short data plus 2 to make it 4bytes aligned. In second pass, load data from stack, -;and the result is stored in transpose. -|vp8_sixtap_predict8x4_armv6| PROC - stmdb sp!, {r4 - r11, lr} - str r3, [sp, #-184]! ;reserve space on stack for temporary storage, store yoffset - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - add lr, sp, #4 ;point to temporary buffer - beq skip_firstpass_filter - -;first-pass filter - adr r12, filter8_coeff - sub r0, r0, r1, lsl #1 - - add r3, r1, #10 ; preload next low - pld [r0, r3] - - add r2, r12, r2, lsl #4 ;calculate filter location - add r0, r0, #3 ;adjust src only for loading convinience - - ldr r3, [r2] ; load up packed filter coefficients - ldr r4, [r2, #4] - ldr r5, [r2, #8] - - mov r2, #0x90000 ; height=9 is top part of counter - - sub r1, r1, #8 - -|first_pass_hloop_v6| - ldrb r6, [r0, #-5] ; load source data - ldrb r7, [r0, #-4] - ldrb r8, [r0, #-3] - ldrb r9, [r0, #-2] - ldrb r10, [r0, #-1] - - orr r2, r2, #0x4 ; construct loop counter. width=8=4x2 - - pkhbt r6, r6, r7, lsl #16 ; r7 | r6 - pkhbt r7, r7, r8, lsl #16 ; r8 | r7 - - pkhbt r8, r8, r9, lsl #16 ; r9 | r8 - pkhbt r9, r9, r10, lsl #16 ; r10 | r9 - -|first_pass_wloop_v6| - smuad r11, r6, r3 ; vp9_filter[0], vp9_filter[1] - smuad r12, r7, r3 - - ldrb r6, [r0], #1 - - smlad r11, r8, r4, r11 ; vp9_filter[2], vp9_filter[3] - ldrb r7, [r0], #1 - smlad r12, r9, r4, r12 - - pkhbt r10, r10, r6, lsl #16 ; r10 | r9 - pkhbt r6, r6, r7, lsl #16 ; r11 | r10 - smlad r11, r10, r5, r11 ; vp9_filter[4], vp9_filter[5] - smlad r12, r6, r5, r12 - - sub r2, r2, #1 - - add r11, r11, #0x40 ; round_shift_and_clamp - tst r2, #0xff ; test loop counter - usat r11, #8, r11, asr #7 - add r12, r12, #0x40 - strh r11, [lr], #20 ; result is transposed and stored, which - usat r12, #8, r12, asr #7 - - strh r12, [lr], #20 - - movne r11, r6 - movne r12, r7 - - movne r6, r8 - movne r7, r9 - movne r8, r10 - movne r9, r11 - movne r10, r12 - - bne first_pass_wloop_v6 - - ;;add r9, ppl, #30 ; attempt to load 2 adjacent cache lines - ;;IF ARCHITECTURE=6 - ;pld [src, ppl] - ;;pld [src, r9] - ;;ENDIF - - subs r2, r2, #0x10000 - - sub lr, lr, #158 - - add r0, r0, r1 ; move to next input line - - add r11, r1, #18 ; preload next low. adding back block width(=8), which is subtracted earlier - pld [r0, r11] - - bne first_pass_hloop_v6 - -;second pass filter -secondpass_filter - ldr r3, [sp], #4 ; load back yoffset - ldr r0, [sp, #216] ; load dst address from stack 180+36 - ldr r1, [sp, #220] ; load dst stride from stack 180+40 - - cmp r3, #0 - beq skip_secondpass_filter - - adr r12, filter8_coeff - add lr, r12, r3, lsl #4 ;calculate filter location - - mov r2, #0x00080000 - - ldr r3, [lr] ; load up packed filter coefficients - ldr r4, [lr, #4] - ldr r5, [lr, #8] - - pkhbt r12, r4, r3 ; pack the filter differently - pkhbt r11, r5, r4 - -second_pass_hloop_v6 - ldr r6, [sp] ; load the data - ldr r7, [sp, #4] - - orr r2, r2, #2 ; loop counter - -second_pass_wloop_v6 - smuad lr, r3, r6 ; apply filter - smulbt r10, r3, r6 - - ldr r8, [sp, #8] - - smlad lr, r4, r7, lr - smladx r10, r12, r7, r10 - - ldrh r9, [sp, #12] - - smlad lr, r5, r8, lr - smladx r10, r11, r8, r10 - - add sp, sp, #4 - smlatb r10, r5, r9, r10 - - sub r2, r2, #1 - - add lr, lr, #0x40 ; round_shift_and_clamp - tst r2, #0xff - usat lr, #8, lr, asr #7 - add r10, r10, #0x40 - strb lr, [r0], r1 ; the result is transposed back and stored - usat r10, #8, r10, asr #7 - - strb r10, [r0],r1 - - movne r6, r7 - movne r7, r8 - - bne second_pass_wloop_v6 - - subs r2, r2, #0x10000 - add sp, sp, #12 ; updata src for next loop (20-8) - sub r0, r0, r1, lsl #2 - add r0, r0, #1 - - bne second_pass_hloop_v6 - - add sp, sp, #20 - ldmia sp!, {r4 - r11, pc} - -;-------------------- -skip_firstpass_filter - sub r0, r0, r1, lsl #1 - sub r1, r1, #8 - mov r2, #9 - -skip_firstpass_hloop - ldrb r4, [r0], #1 ; load data - subs r2, r2, #1 - ldrb r5, [r0], #1 - strh r4, [lr], #20 ; store it to immediate buffer - ldrb r6, [r0], #1 ; load data - strh r5, [lr], #20 - ldrb r7, [r0], #1 - strh r6, [lr], #20 - ldrb r8, [r0], #1 - strh r7, [lr], #20 - ldrb r9, [r0], #1 - strh r8, [lr], #20 - ldrb r10, [r0], #1 - strh r9, [lr], #20 - ldrb r11, [r0], #1 - strh r10, [lr], #20 - add r0, r0, r1 ; move to next input line - strh r11, [lr], #20 - - sub lr, lr, #158 ; move over to next column - bne skip_firstpass_hloop - - b secondpass_filter - -;-------------------- -skip_secondpass_filter - mov r2, #8 - add sp, sp, #4 ;start from src[0] instead of src[-2] - -skip_secondpass_hloop - ldr r6, [sp], #4 - subs r2, r2, #1 - ldr r8, [sp], #4 - - mov r7, r6, lsr #16 ; unpack - strb r6, [r0], r1 - mov r9, r8, lsr #16 - strb r7, [r0], r1 - add sp, sp, #12 ; 20-8 - strb r8, [r0], r1 - strb r9, [r0], r1 - - sub r0, r0, r1, lsl #2 - add r0, r0, #1 - - bne skip_secondpass_hloop - - add sp, sp, #16 ; 180 - (160 +4) - - ldmia sp!, {r4 - r11, pc} - - ENDP - -;----------------- -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -filter8_coeff - DCD 0x00000000, 0x00000080, 0x00000000, 0x00000000 - DCD 0xfffa0000, 0x000c007b, 0x0000ffff, 0x00000000 - DCD 0xfff50002, 0x0024006c, 0x0001fff8, 0x00000000 - DCD 0xfff70000, 0x0032005d, 0x0000fffa, 0x00000000 - DCD 0xfff00003, 0x004d004d, 0x0003fff0, 0x00000000 - DCD 0xfffa0000, 0x005d0032, 0x0000fff7, 0x00000000 - DCD 0xfff80001, 0x006c0024, 0x0002fff5, 0x00000000 - DCD 0xffff0000, 0x007b000c, 0x0000fffa, 0x00000000 - - ;DCD 0, 0, 128, 0, 0, 0 - ;DCD 0, -6, 123, 12, -1, 0 - ;DCD 2, -11, 108, 36, -8, 1 - ;DCD 0, -9, 93, 50, -6, 0 - ;DCD 3, -16, 77, 77, -16, 3 - ;DCD 0, -6, 50, 93, -9, 0 - ;DCD 1, -8, 36, 108, -11, 2 - ;DCD 0, -1, 12, 123, -6, 0 - - END diff --git a/vp9/common/arm/neon/vp9_bilinearpredict16x16_neon.asm b/vp9/common/arm/neon/vp9_bilinearpredict16x16_neon.asm deleted file mode 100644 index 2528be7c3..000000000 --- a/vp9/common/arm/neon/vp9_bilinearpredict16x16_neon.asm +++ /dev/null @@ -1,357 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_bilinear_predict16x16_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; r4 unsigned char *dst_ptr, -; stack(r5) int dst_pitch - -|vp8_bilinear_predict16x16_neon| PROC - push {r4-r5, lr} - - adr r12, bifilter16_coeff - ldr r4, [sp, #12] ;load parameters from stack - ldr r5, [sp, #16] ;load parameters from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq secondpass_bfilter16x16_only - - add r2, r12, r2, lsl #3 ;calculate filter location - - cmp r3, #0 ;skip second_pass filter if yoffset=0 - - vld1.s32 {d31}, [r2] ;load first_pass filter - - beq firstpass_bfilter16x16_only - - sub sp, sp, #272 ;reserve space on stack for temporary storage - vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data - mov lr, sp - vld1.u8 {d5, d6, d7}, [r0], r1 - - mov r2, #3 ;loop counter - vld1.u8 {d8, d9, d10}, [r0], r1 - - vdup.8 d0, d31[0] ;first_pass filter (d0 d1) - vld1.u8 {d11, d12, d13}, [r0], r1 - - vdup.8 d1, d31[4] - -;First Pass: output_height lines x output_width columns (17x16) -filt_blk2d_fp16x16_loop_neon - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp9_filter[0]) - vmull.u8 q8, d3, d0 - vmull.u8 q9, d5, d0 - vmull.u8 q10, d6, d0 - vmull.u8 q11, d8, d0 - vmull.u8 q12, d9, d0 - vmull.u8 q13, d11, d0 - vmull.u8 q14, d12, d0 - - vext.8 d2, d2, d3, #1 ;construct src_ptr[1] - vext.8 d5, d5, d6, #1 - vext.8 d8, d8, d9, #1 - vext.8 d11, d11, d12, #1 - - vmlal.u8 q7, d2, d1 ;(src_ptr[0] * vp9_filter[1]) - vmlal.u8 q9, d5, d1 - vmlal.u8 q11, d8, d1 - vmlal.u8 q13, d11, d1 - - vext.8 d3, d3, d4, #1 - vext.8 d6, d6, d7, #1 - vext.8 d9, d9, d10, #1 - vext.8 d12, d12, d13, #1 - - vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp9_filter[1]) - vmlal.u8 q10, d6, d1 - vmlal.u8 q12, d9, d1 - vmlal.u8 q14, d12, d1 - - subs r2, r2, #1 - - vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d15, q8, #7 - vqrshrn.u16 d16, q9, #7 - vqrshrn.u16 d17, q10, #7 - vqrshrn.u16 d18, q11, #7 - vqrshrn.u16 d19, q12, #7 - vqrshrn.u16 d20, q13, #7 - - vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data - vqrshrn.u16 d21, q14, #7 - vld1.u8 {d5, d6, d7}, [r0], r1 - - vst1.u8 {d14, d15, d16, d17}, [lr]! ;store result - vld1.u8 {d8, d9, d10}, [r0], r1 - vst1.u8 {d18, d19, d20, d21}, [lr]! - vld1.u8 {d11, d12, d13}, [r0], r1 - - bne filt_blk2d_fp16x16_loop_neon - -;First-pass filtering for rest 5 lines - vld1.u8 {d14, d15, d16}, [r0], r1 - - vmull.u8 q9, d2, d0 ;(src_ptr[0] * vp9_filter[0]) - vmull.u8 q10, d3, d0 - vmull.u8 q11, d5, d0 - vmull.u8 q12, d6, d0 - vmull.u8 q13, d8, d0 - vmull.u8 q14, d9, d0 - - vext.8 d2, d2, d3, #1 ;construct src_ptr[1] - vext.8 d5, d5, d6, #1 - vext.8 d8, d8, d9, #1 - - vmlal.u8 q9, d2, d1 ;(src_ptr[0] * vp9_filter[1]) - vmlal.u8 q11, d5, d1 - vmlal.u8 q13, d8, d1 - - vext.8 d3, d3, d4, #1 - vext.8 d6, d6, d7, #1 - vext.8 d9, d9, d10, #1 - - vmlal.u8 q10, d3, d1 ;(src_ptr[0] * vp9_filter[1]) - vmlal.u8 q12, d6, d1 - vmlal.u8 q14, d9, d1 - - vmull.u8 q1, d11, d0 - vmull.u8 q2, d12, d0 - vmull.u8 q3, d14, d0 - vmull.u8 q4, d15, d0 - - vext.8 d11, d11, d12, #1 ;construct src_ptr[1] - vext.8 d14, d14, d15, #1 - - vmlal.u8 q1, d11, d1 ;(src_ptr[0] * vp9_filter[1]) - vmlal.u8 q3, d14, d1 - - vext.8 d12, d12, d13, #1 - vext.8 d15, d15, d16, #1 - - vmlal.u8 q2, d12, d1 ;(src_ptr[0] * vp9_filter[1]) - vmlal.u8 q4, d15, d1 - - vqrshrn.u16 d10, q9, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d11, q10, #7 - vqrshrn.u16 d12, q11, #7 - vqrshrn.u16 d13, q12, #7 - vqrshrn.u16 d14, q13, #7 - vqrshrn.u16 d15, q14, #7 - vqrshrn.u16 d16, q1, #7 - vqrshrn.u16 d17, q2, #7 - vqrshrn.u16 d18, q3, #7 - vqrshrn.u16 d19, q4, #7 - - vst1.u8 {d10, d11, d12, d13}, [lr]! ;store result - vst1.u8 {d14, d15, d16, d17}, [lr]! - vst1.u8 {d18, d19}, [lr]! - -;Second pass: 16x16 -;secondpass_filter - add r3, r12, r3, lsl #3 - sub lr, lr, #272 - - vld1.u32 {d31}, [r3] ;load second_pass filter - - vld1.u8 {d22, d23}, [lr]! ;load src data - - vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) - vdup.8 d1, d31[4] - mov r12, #4 ;loop counter - -filt_blk2d_sp16x16_loop_neon - vld1.u8 {d24, d25}, [lr]! - vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp9_filter[0]) - vld1.u8 {d26, d27}, [lr]! - vmull.u8 q2, d23, d0 - vld1.u8 {d28, d29}, [lr]! - vmull.u8 q3, d24, d0 - vld1.u8 {d30, d31}, [lr]! - - vmull.u8 q4, d25, d0 - vmull.u8 q5, d26, d0 - vmull.u8 q6, d27, d0 - vmull.u8 q7, d28, d0 - vmull.u8 q8, d29, d0 - - vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * vp9_filter[1]) - vmlal.u8 q2, d25, d1 - vmlal.u8 q3, d26, d1 - vmlal.u8 q4, d27, d1 - vmlal.u8 q5, d28, d1 - vmlal.u8 q6, d29, d1 - vmlal.u8 q7, d30, d1 - vmlal.u8 q8, d31, d1 - - subs r12, r12, #1 - - vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d3, q2, #7 - vqrshrn.u16 d4, q3, #7 - vqrshrn.u16 d5, q4, #7 - vqrshrn.u16 d6, q5, #7 - vqrshrn.u16 d7, q6, #7 - vqrshrn.u16 d8, q7, #7 - vqrshrn.u16 d9, q8, #7 - - vst1.u8 {d2, d3}, [r4], r5 ;store result - vst1.u8 {d4, d5}, [r4], r5 - vst1.u8 {d6, d7}, [r4], r5 - vmov q11, q15 - vst1.u8 {d8, d9}, [r4], r5 - - bne filt_blk2d_sp16x16_loop_neon - - add sp, sp, #272 - - pop {r4-r5,pc} - -;-------------------- -firstpass_bfilter16x16_only - mov r2, #4 ;loop counter - vdup.8 d0, d31[0] ;first_pass filter (d0 d1) - vdup.8 d1, d31[4] - -;First Pass: output_height lines x output_width columns (16x16) -filt_blk2d_fpo16x16_loop_neon - vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data - vld1.u8 {d5, d6, d7}, [r0], r1 - vld1.u8 {d8, d9, d10}, [r0], r1 - vld1.u8 {d11, d12, d13}, [r0], r1 - - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp9_filter[0]) - vmull.u8 q8, d3, d0 - vmull.u8 q9, d5, d0 - vmull.u8 q10, d6, d0 - vmull.u8 q11, d8, d0 - vmull.u8 q12, d9, d0 - vmull.u8 q13, d11, d0 - vmull.u8 q14, d12, d0 - - vext.8 d2, d2, d3, #1 ;construct src_ptr[1] - vext.8 d5, d5, d6, #1 - vext.8 d8, d8, d9, #1 - vext.8 d11, d11, d12, #1 - - vmlal.u8 q7, d2, d1 ;(src_ptr[0] * vp9_filter[1]) - vmlal.u8 q9, d5, d1 - vmlal.u8 q11, d8, d1 - vmlal.u8 q13, d11, d1 - - vext.8 d3, d3, d4, #1 - vext.8 d6, d6, d7, #1 - vext.8 d9, d9, d10, #1 - vext.8 d12, d12, d13, #1 - - vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp9_filter[1]) - vmlal.u8 q10, d6, d1 - vmlal.u8 q12, d9, d1 - vmlal.u8 q14, d12, d1 - - subs r2, r2, #1 - - vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d15, q8, #7 - vqrshrn.u16 d16, q9, #7 - vqrshrn.u16 d17, q10, #7 - vqrshrn.u16 d18, q11, #7 - vqrshrn.u16 d19, q12, #7 - vqrshrn.u16 d20, q13, #7 - vst1.u8 {d14, d15}, [r4], r5 ;store result - vqrshrn.u16 d21, q14, #7 - - vst1.u8 {d16, d17}, [r4], r5 - vst1.u8 {d18, d19}, [r4], r5 - vst1.u8 {d20, d21}, [r4], r5 - - bne filt_blk2d_fpo16x16_loop_neon - pop {r4-r5,pc} - -;--------------------- -secondpass_bfilter16x16_only -;Second pass: 16x16 -;secondpass_filter - add r3, r12, r3, lsl #3 - mov r12, #4 ;loop counter - vld1.u32 {d31}, [r3] ;load second_pass filter - vld1.u8 {d22, d23}, [r0], r1 ;load src data - - vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) - vdup.8 d1, d31[4] - -filt_blk2d_spo16x16_loop_neon - vld1.u8 {d24, d25}, [r0], r1 - vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp9_filter[0]) - vld1.u8 {d26, d27}, [r0], r1 - vmull.u8 q2, d23, d0 - vld1.u8 {d28, d29}, [r0], r1 - vmull.u8 q3, d24, d0 - vld1.u8 {d30, d31}, [r0], r1 - - vmull.u8 q4, d25, d0 - vmull.u8 q5, d26, d0 - vmull.u8 q6, d27, d0 - vmull.u8 q7, d28, d0 - vmull.u8 q8, d29, d0 - - vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * vp9_filter[1]) - vmlal.u8 q2, d25, d1 - vmlal.u8 q3, d26, d1 - vmlal.u8 q4, d27, d1 - vmlal.u8 q5, d28, d1 - vmlal.u8 q6, d29, d1 - vmlal.u8 q7, d30, d1 - vmlal.u8 q8, d31, d1 - - vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d3, q2, #7 - vqrshrn.u16 d4, q3, #7 - vqrshrn.u16 d5, q4, #7 - vqrshrn.u16 d6, q5, #7 - vqrshrn.u16 d7, q6, #7 - vqrshrn.u16 d8, q7, #7 - vqrshrn.u16 d9, q8, #7 - - vst1.u8 {d2, d3}, [r4], r5 ;store result - subs r12, r12, #1 - vst1.u8 {d4, d5}, [r4], r5 - vmov q11, q15 - vst1.u8 {d6, d7}, [r4], r5 - vst1.u8 {d8, d9}, [r4], r5 - - bne filt_blk2d_spo16x16_loop_neon - pop {r4-r5,pc} - - ENDP - -;----------------- - -bifilter16_coeff - DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 - - END diff --git a/vp9/common/arm/neon/vp9_bilinearpredict4x4_neon.asm b/vp9/common/arm/neon/vp9_bilinearpredict4x4_neon.asm deleted file mode 100644 index 01eedf8e9..000000000 --- a/vp9/common/arm/neon/vp9_bilinearpredict4x4_neon.asm +++ /dev/null @@ -1,130 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_bilinear_predict4x4_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; r4 unsigned char *dst_ptr, -; stack(lr) int dst_pitch - -|vp8_bilinear_predict4x4_neon| PROC - push {r4, lr} - - adr r12, bifilter4_coeff - ldr r4, [sp, #8] ;load parameters from stack - ldr lr, [sp, #12] ;load parameters from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq skip_firstpass_filter - -;First pass: output_height lines x output_width columns (5x4) - vld1.u8 {d2}, [r0], r1 ;load src data - add r2, r12, r2, lsl #3 ;calculate Hfilter location (2coeffsx4bytes=8bytes) - - vld1.u8 {d3}, [r0], r1 - vld1.u32 {d31}, [r2] ;first_pass filter - - vld1.u8 {d4}, [r0], r1 - vdup.8 d0, d31[0] ;first_pass filter (d0-d1) - vld1.u8 {d5}, [r0], r1 - vdup.8 d1, d31[4] - vld1.u8 {d6}, [r0], r1 - - vshr.u64 q4, q1, #8 ;construct src_ptr[1] - vshr.u64 q5, q2, #8 - vshr.u64 d12, d6, #8 - - vzip.32 d2, d3 ;put 2-line data in 1 register (src_ptr[0]) - vzip.32 d4, d5 - vzip.32 d8, d9 ;put 2-line data in 1 register (src_ptr[1]) - vzip.32 d10, d11 - - vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp9_filter[0]) - vmull.u8 q8, d4, d0 - vmull.u8 q9, d6, d0 - - vmlal.u8 q7, d8, d1 ;(src_ptr[1] * vp9_filter[1]) - vmlal.u8 q8, d10, d1 - vmlal.u8 q9, d12, d1 - - vqrshrn.u16 d28, q7, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d29, q8, #7 - vqrshrn.u16 d30, q9, #7 - -;Second pass: 4x4 -secondpass_filter - cmp r3, #0 ;skip second_pass filter if yoffset=0 - beq skip_secondpass_filter - - add r3, r12, r3, lsl #3 ;calculate Vfilter location - vld1.u32 {d31}, [r3] ;load second_pass filter - - vdup.8 d0, d31[0] ;second_pass filter parameters (d0-d5) - vdup.8 d1, d31[4] - - vmull.u8 q1, d28, d0 - vmull.u8 q2, d29, d0 - - vext.8 d26, d28, d29, #4 ;construct src_ptr[pixel_step] - vext.8 d27, d29, d30, #4 - - vmlal.u8 q1, d26, d1 - vmlal.u8 q2, d27, d1 - - add r0, r4, lr - add r1, r0, lr - add r2, r1, lr - - vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d3, q2, #7 - - vst1.32 {d2[0]}, [r4] ;store result - vst1.32 {d2[1]}, [r0] - vst1.32 {d3[0]}, [r1] - vst1.32 {d3[1]}, [r2] - - pop {r4, pc} - -;-------------------- -skip_firstpass_filter - - vld1.32 {d28[0]}, [r0], r1 ;load src data - vld1.32 {d28[1]}, [r0], r1 - vld1.32 {d29[0]}, [r0], r1 - vld1.32 {d29[1]}, [r0], r1 - vld1.32 {d30[0]}, [r0], r1 - - b secondpass_filter - -;--------------------- -skip_secondpass_filter - vst1.32 {d28[0]}, [r4], lr ;store result - vst1.32 {d28[1]}, [r4], lr - vst1.32 {d29[0]}, [r4], lr - vst1.32 {d29[1]}, [r4], lr - - pop {r4, pc} - - ENDP - -;----------------- - -bifilter4_coeff - DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 - - END diff --git a/vp9/common/arm/neon/vp9_bilinearpredict8x4_neon.asm b/vp9/common/arm/neon/vp9_bilinearpredict8x4_neon.asm deleted file mode 100644 index 8f49345ff..000000000 --- a/vp9/common/arm/neon/vp9_bilinearpredict8x4_neon.asm +++ /dev/null @@ -1,135 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_bilinear_predict8x4_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; r4 unsigned char *dst_ptr, -; stack(lr) int dst_pitch - -|vp8_bilinear_predict8x4_neon| PROC - push {r4, lr} - - adr r12, bifilter8x4_coeff - ldr r4, [sp, #8] ;load parameters from stack - ldr lr, [sp, #12] ;load parameters from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq skip_firstpass_filter - -;First pass: output_height lines x output_width columns (5x8) - add r2, r12, r2, lsl #3 ;calculate filter location - - vld1.u8 {q1}, [r0], r1 ;load src data - vld1.u32 {d31}, [r2] ;load first_pass filter - vld1.u8 {q2}, [r0], r1 - vdup.8 d0, d31[0] ;first_pass filter (d0 d1) - vld1.u8 {q3}, [r0], r1 - vdup.8 d1, d31[4] - vld1.u8 {q4}, [r0], r1 - - vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp9_filter[0]) - vld1.u8 {q5}, [r0], r1 - vmull.u8 q7, d4, d0 - vmull.u8 q8, d6, d0 - vmull.u8 q9, d8, d0 - vmull.u8 q10, d10, d0 - - vext.8 d3, d2, d3, #1 ;construct src_ptr[-1] - vext.8 d5, d4, d5, #1 - vext.8 d7, d6, d7, #1 - vext.8 d9, d8, d9, #1 - vext.8 d11, d10, d11, #1 - - vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp9_filter[1]) - vmlal.u8 q7, d5, d1 - vmlal.u8 q8, d7, d1 - vmlal.u8 q9, d9, d1 - vmlal.u8 q10, d11, d1 - - vqrshrn.u16 d22, q6, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d23, q7, #7 - vqrshrn.u16 d24, q8, #7 - vqrshrn.u16 d25, q9, #7 - vqrshrn.u16 d26, q10, #7 - -;Second pass: 4x8 -secondpass_filter - cmp r3, #0 ;skip second_pass filter if yoffset=0 - beq skip_secondpass_filter - - add r3, r12, r3, lsl #3 - add r0, r4, lr - - vld1.u32 {d31}, [r3] ;load second_pass filter - add r1, r0, lr - - vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) - vdup.8 d1, d31[4] - - vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp9_filter[0]) - vmull.u8 q2, d23, d0 - vmull.u8 q3, d24, d0 - vmull.u8 q4, d25, d0 - - vmlal.u8 q1, d23, d1 ;(src_ptr[pixel_step] * vp9_filter[1]) - vmlal.u8 q2, d24, d1 - vmlal.u8 q3, d25, d1 - vmlal.u8 q4, d26, d1 - - add r2, r1, lr - - vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d3, q2, #7 - vqrshrn.u16 d4, q3, #7 - vqrshrn.u16 d5, q4, #7 - - vst1.u8 {d2}, [r4] ;store result - vst1.u8 {d3}, [r0] - vst1.u8 {d4}, [r1] - vst1.u8 {d5}, [r2] - - pop {r4, pc} - -;-------------------- -skip_firstpass_filter - vld1.u8 {d22}, [r0], r1 ;load src data - vld1.u8 {d23}, [r0], r1 - vld1.u8 {d24}, [r0], r1 - vld1.u8 {d25}, [r0], r1 - vld1.u8 {d26}, [r0], r1 - - b secondpass_filter - -;--------------------- -skip_secondpass_filter - vst1.u8 {d22}, [r4], lr ;store result - vst1.u8 {d23}, [r4], lr - vst1.u8 {d24}, [r4], lr - vst1.u8 {d25}, [r4], lr - - pop {r4, pc} - - ENDP - -;----------------- - -bifilter8x4_coeff - DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 - - END diff --git a/vp9/common/arm/neon/vp9_bilinearpredict8x8_neon.asm b/vp9/common/arm/neon/vp9_bilinearpredict8x8_neon.asm deleted file mode 100644 index 6967f1950..000000000 --- a/vp9/common/arm/neon/vp9_bilinearpredict8x8_neon.asm +++ /dev/null @@ -1,183 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_bilinear_predict8x8_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; r4 unsigned char *dst_ptr, -; stack(lr) int dst_pitch - -|vp8_bilinear_predict8x8_neon| PROC - push {r4, lr} - - adr r12, bifilter8_coeff - ldr r4, [sp, #8] ;load parameters from stack - ldr lr, [sp, #12] ;load parameters from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq skip_firstpass_filter - -;First pass: output_height lines x output_width columns (9x8) - add r2, r12, r2, lsl #3 ;calculate filter location - - vld1.u8 {q1}, [r0], r1 ;load src data - vld1.u32 {d31}, [r2] ;load first_pass filter - vld1.u8 {q2}, [r0], r1 - vdup.8 d0, d31[0] ;first_pass filter (d0 d1) - vld1.u8 {q3}, [r0], r1 - vdup.8 d1, d31[4] - vld1.u8 {q4}, [r0], r1 - - vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp9_filter[0]) - vmull.u8 q7, d4, d0 - vmull.u8 q8, d6, d0 - vmull.u8 q9, d8, d0 - - vext.8 d3, d2, d3, #1 ;construct src_ptr[-1] - vext.8 d5, d4, d5, #1 - vext.8 d7, d6, d7, #1 - vext.8 d9, d8, d9, #1 - - vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp9_filter[1]) - vmlal.u8 q7, d5, d1 - vmlal.u8 q8, d7, d1 - vmlal.u8 q9, d9, d1 - - vld1.u8 {q1}, [r0], r1 ;load src data - vqrshrn.u16 d22, q6, #7 ;shift/round/saturate to u8 - vld1.u8 {q2}, [r0], r1 - vqrshrn.u16 d23, q7, #7 - vld1.u8 {q3}, [r0], r1 - vqrshrn.u16 d24, q8, #7 - vld1.u8 {q4}, [r0], r1 - vqrshrn.u16 d25, q9, #7 - - ;first_pass filtering on the rest 5-line data - vld1.u8 {q5}, [r0], r1 - - vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp9_filter[0]) - vmull.u8 q7, d4, d0 - vmull.u8 q8, d6, d0 - vmull.u8 q9, d8, d0 - vmull.u8 q10, d10, d0 - - vext.8 d3, d2, d3, #1 ;construct src_ptr[-1] - vext.8 d5, d4, d5, #1 - vext.8 d7, d6, d7, #1 - vext.8 d9, d8, d9, #1 - vext.8 d11, d10, d11, #1 - - vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp9_filter[1]) - vmlal.u8 q7, d5, d1 - vmlal.u8 q8, d7, d1 - vmlal.u8 q9, d9, d1 - vmlal.u8 q10, d11, d1 - - vqrshrn.u16 d26, q6, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d27, q7, #7 - vqrshrn.u16 d28, q8, #7 - vqrshrn.u16 d29, q9, #7 - vqrshrn.u16 d30, q10, #7 - -;Second pass: 8x8 -secondpass_filter - cmp r3, #0 ;skip second_pass filter if yoffset=0 - beq skip_secondpass_filter - - add r3, r12, r3, lsl #3 - add r0, r4, lr - - vld1.u32 {d31}, [r3] ;load second_pass filter - add r1, r0, lr - - vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) - vdup.8 d1, d31[4] - - vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp9_filter[0]) - vmull.u8 q2, d23, d0 - vmull.u8 q3, d24, d0 - vmull.u8 q4, d25, d0 - vmull.u8 q5, d26, d0 - vmull.u8 q6, d27, d0 - vmull.u8 q7, d28, d0 - vmull.u8 q8, d29, d0 - - vmlal.u8 q1, d23, d1 ;(src_ptr[pixel_step] * vp9_filter[1]) - vmlal.u8 q2, d24, d1 - vmlal.u8 q3, d25, d1 - vmlal.u8 q4, d26, d1 - vmlal.u8 q5, d27, d1 - vmlal.u8 q6, d28, d1 - vmlal.u8 q7, d29, d1 - vmlal.u8 q8, d30, d1 - - vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d3, q2, #7 - vqrshrn.u16 d4, q3, #7 - vqrshrn.u16 d5, q4, #7 - vqrshrn.u16 d6, q5, #7 - vqrshrn.u16 d7, q6, #7 - vqrshrn.u16 d8, q7, #7 - vqrshrn.u16 d9, q8, #7 - - vst1.u8 {d2}, [r4] ;store result - vst1.u8 {d3}, [r0] - vst1.u8 {d4}, [r1], lr - vst1.u8 {d5}, [r1], lr - vst1.u8 {d6}, [r1], lr - vst1.u8 {d7}, [r1], lr - vst1.u8 {d8}, [r1], lr - vst1.u8 {d9}, [r1], lr - - pop {r4, pc} - -;-------------------- -skip_firstpass_filter - vld1.u8 {d22}, [r0], r1 ;load src data - vld1.u8 {d23}, [r0], r1 - vld1.u8 {d24}, [r0], r1 - vld1.u8 {d25}, [r0], r1 - vld1.u8 {d26}, [r0], r1 - vld1.u8 {d27}, [r0], r1 - vld1.u8 {d28}, [r0], r1 - vld1.u8 {d29}, [r0], r1 - vld1.u8 {d30}, [r0], r1 - - b secondpass_filter - -;--------------------- -skip_secondpass_filter - vst1.u8 {d22}, [r4], lr ;store result - vst1.u8 {d23}, [r4], lr - vst1.u8 {d24}, [r4], lr - vst1.u8 {d25}, [r4], lr - vst1.u8 {d26}, [r4], lr - vst1.u8 {d27}, [r4], lr - vst1.u8 {d28}, [r4], lr - vst1.u8 {d29}, [r4], lr - - pop {r4, pc} - - ENDP - -;----------------- - -bifilter8_coeff - DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 - - END diff --git a/vp9/common/arm/neon/vp9_buildintrapredictorsmby_neon.asm b/vp9/common/arm/neon/vp9_buildintrapredictorsmby_neon.asm deleted file mode 100644 index e3ea91fe6..000000000 --- a/vp9/common/arm/neon/vp9_buildintrapredictorsmby_neon.asm +++ /dev/null @@ -1,584 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_build_intra_predictors_mby_neon_func| - EXPORT |vp8_build_intra_predictors_mby_s_neon_func| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; r0 unsigned char *y_buffer -; r1 unsigned char *ypred_ptr -; r2 int y_stride -; r3 int mode -; stack int Up -; stack int Left - -|vp8_build_intra_predictors_mby_neon_func| PROC - push {r4-r8, lr} - - cmp r3, #0 - beq case_dc_pred - cmp r3, #1 - beq case_v_pred - cmp r3, #2 - beq case_h_pred - cmp r3, #3 - beq case_tm_pred - -case_dc_pred - ldr r4, [sp, #24] ; Up - ldr r5, [sp, #28] ; Left - - ; Default the DC average to 128 - mov r12, #128 - vdup.u8 q0, r12 - - ; Zero out running sum - mov r12, #0 - - ; compute shift and jump - adds r7, r4, r5 - beq skip_dc_pred_up_left - - ; Load above row, if it exists - cmp r4, #0 - beq skip_dc_pred_up - - sub r6, r0, r2 - vld1.8 {q1}, [r6] - vpaddl.u8 q2, q1 - vpaddl.u16 q3, q2 - vpaddl.u32 q4, q3 - - vmov.32 r4, d8[0] - vmov.32 r6, d9[0] - - add r12, r4, r6 - - ; Move back to interger registers - -skip_dc_pred_up - - cmp r5, #0 - beq skip_dc_pred_left - - sub r0, r0, #1 - - ; Load left row, if it exists - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - - add r12, r12, r3 - add r12, r12, r4 - add r12, r12, r5 - add r12, r12, r6 - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - - add r12, r12, r3 - add r12, r12, r4 - add r12, r12, r5 - add r12, r12, r6 - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - - add r12, r12, r3 - add r12, r12, r4 - add r12, r12, r5 - add r12, r12, r6 - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0] - - add r12, r12, r3 - add r12, r12, r4 - add r12, r12, r5 - add r12, r12, r6 - -skip_dc_pred_left - add r7, r7, #3 ; Shift - sub r4, r7, #1 - mov r5, #1 - add r12, r12, r5, lsl r4 - mov r5, r12, lsr r7 ; expected_dc - - vdup.u8 q0, r5 - -skip_dc_pred_up_left - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - - pop {r4-r8,pc} -case_v_pred - ; Copy down above row - sub r6, r0, r2 - vld1.8 {q0}, [r6] - - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - vst1.u8 {q0}, [r1]! - pop {r4-r8,pc} - -case_h_pred - ; Load 4x yleft_col - sub r0, r0, #1 - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - vdup.u8 q0, r3 - vdup.u8 q1, r4 - vdup.u8 q2, r5 - vdup.u8 q3, r6 - vst1.u8 {q0}, [r1]! - vst1.u8 {q1}, [r1]! - vst1.u8 {q2}, [r1]! - vst1.u8 {q3}, [r1]! - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - vdup.u8 q0, r3 - vdup.u8 q1, r4 - vdup.u8 q2, r5 - vdup.u8 q3, r6 - vst1.u8 {q0}, [r1]! - vst1.u8 {q1}, [r1]! - vst1.u8 {q2}, [r1]! - vst1.u8 {q3}, [r1]! - - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - vdup.u8 q0, r3 - vdup.u8 q1, r4 - vdup.u8 q2, r5 - vdup.u8 q3, r6 - vst1.u8 {q0}, [r1]! - vst1.u8 {q1}, [r1]! - vst1.u8 {q2}, [r1]! - vst1.u8 {q3}, [r1]! - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - vdup.u8 q0, r3 - vdup.u8 q1, r4 - vdup.u8 q2, r5 - vdup.u8 q3, r6 - vst1.u8 {q0}, [r1]! - vst1.u8 {q1}, [r1]! - vst1.u8 {q2}, [r1]! - vst1.u8 {q3}, [r1]! - - pop {r4-r8,pc} - -case_tm_pred - ; Load yabove_row - sub r3, r0, r2 - vld1.8 {q8}, [r3] - - ; Load ytop_left - sub r3, r3, #1 - ldrb r7, [r3] - - vdup.u16 q7, r7 - - ; Compute yabove_row - ytop_left - mov r3, #1 - vdup.u8 q0, r3 - - vmull.u8 q4, d16, d0 - vmull.u8 q5, d17, d0 - - vsub.s16 q4, q4, q7 - vsub.s16 q5, q5, q7 - - ; Load 4x yleft_col - sub r0, r0, #1 - mov r12, #4 - -case_tm_pred_loop - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - vdup.u16 q0, r3 - vdup.u16 q1, r4 - vdup.u16 q2, r5 - vdup.u16 q3, r6 - - vqadd.s16 q8, q0, q4 - vqadd.s16 q9, q0, q5 - - vqadd.s16 q10, q1, q4 - vqadd.s16 q11, q1, q5 - - vqadd.s16 q12, q2, q4 - vqadd.s16 q13, q2, q5 - - vqadd.s16 q14, q3, q4 - vqadd.s16 q15, q3, q5 - - vqshrun.s16 d0, q8, #0 - vqshrun.s16 d1, q9, #0 - - vqshrun.s16 d2, q10, #0 - vqshrun.s16 d3, q11, #0 - - vqshrun.s16 d4, q12, #0 - vqshrun.s16 d5, q13, #0 - - vqshrun.s16 d6, q14, #0 - vqshrun.s16 d7, q15, #0 - - vst1.u8 {q0}, [r1]! - vst1.u8 {q1}, [r1]! - vst1.u8 {q2}, [r1]! - vst1.u8 {q3}, [r1]! - - subs r12, r12, #1 - bne case_tm_pred_loop - - pop {r4-r8,pc} - - ENDP - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; r0 unsigned char *y_buffer -; r1 unsigned char *ypred_ptr -; r2 int y_stride -; r3 int mode -; stack int Up -; stack int Left - -|vp8_build_intra_predictors_mby_s_neon_func| PROC - push {r4-r8, lr} - - mov r1, r0 ; unsigned char *ypred_ptr = x->dst.y_buffer; //x->Predictor; - - cmp r3, #0 - beq case_dc_pred_s - cmp r3, #1 - beq case_v_pred_s - cmp r3, #2 - beq case_h_pred_s - cmp r3, #3 - beq case_tm_pred_s - -case_dc_pred_s - ldr r4, [sp, #24] ; Up - ldr r5, [sp, #28] ; Left - - ; Default the DC average to 128 - mov r12, #128 - vdup.u8 q0, r12 - - ; Zero out running sum - mov r12, #0 - - ; compute shift and jump - adds r7, r4, r5 - beq skip_dc_pred_up_left_s - - ; Load above row, if it exists - cmp r4, #0 - beq skip_dc_pred_up_s - - sub r6, r0, r2 - vld1.8 {q1}, [r6] - vpaddl.u8 q2, q1 - vpaddl.u16 q3, q2 - vpaddl.u32 q4, q3 - - vmov.32 r4, d8[0] - vmov.32 r6, d9[0] - - add r12, r4, r6 - - ; Move back to interger registers - -skip_dc_pred_up_s - - cmp r5, #0 - beq skip_dc_pred_left_s - - sub r0, r0, #1 - - ; Load left row, if it exists - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - - add r12, r12, r3 - add r12, r12, r4 - add r12, r12, r5 - add r12, r12, r6 - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - - add r12, r12, r3 - add r12, r12, r4 - add r12, r12, r5 - add r12, r12, r6 - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - - add r12, r12, r3 - add r12, r12, r4 - add r12, r12, r5 - add r12, r12, r6 - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0] - - add r12, r12, r3 - add r12, r12, r4 - add r12, r12, r5 - add r12, r12, r6 - -skip_dc_pred_left_s - add r7, r7, #3 ; Shift - sub r4, r7, #1 - mov r5, #1 - add r12, r12, r5, lsl r4 - mov r5, r12, lsr r7 ; expected_dc - - vdup.u8 q0, r5 - -skip_dc_pred_up_left_s - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - - pop {r4-r8,pc} -case_v_pred_s - ; Copy down above row - sub r6, r0, r2 - vld1.8 {q0}, [r6] - - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q0}, [r1], r2 - pop {r4-r8,pc} - -case_h_pred_s - ; Load 4x yleft_col - sub r0, r0, #1 - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - vdup.u8 q0, r3 - vdup.u8 q1, r4 - vdup.u8 q2, r5 - vdup.u8 q3, r6 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q1}, [r1], r2 - vst1.u8 {q2}, [r1], r2 - vst1.u8 {q3}, [r1], r2 - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - vdup.u8 q0, r3 - vdup.u8 q1, r4 - vdup.u8 q2, r5 - vdup.u8 q3, r6 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q1}, [r1], r2 - vst1.u8 {q2}, [r1], r2 - vst1.u8 {q3}, [r1], r2 - - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - vdup.u8 q0, r3 - vdup.u8 q1, r4 - vdup.u8 q2, r5 - vdup.u8 q3, r6 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q1}, [r1], r2 - vst1.u8 {q2}, [r1], r2 - vst1.u8 {q3}, [r1], r2 - - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - vdup.u8 q0, r3 - vdup.u8 q1, r4 - vdup.u8 q2, r5 - vdup.u8 q3, r6 - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q1}, [r1], r2 - vst1.u8 {q2}, [r1], r2 - vst1.u8 {q3}, [r1], r2 - - pop {r4-r8,pc} - -case_tm_pred_s - ; Load yabove_row - sub r3, r0, r2 - vld1.8 {q8}, [r3] - - ; Load ytop_left - sub r3, r3, #1 - ldrb r7, [r3] - - vdup.u16 q7, r7 - - ; Compute yabove_row - ytop_left - mov r3, #1 - vdup.u8 q0, r3 - - vmull.u8 q4, d16, d0 - vmull.u8 q5, d17, d0 - - vsub.s16 q4, q4, q7 - vsub.s16 q5, q5, q7 - - ; Load 4x yleft_col - sub r0, r0, #1 - mov r12, #4 - -case_tm_pred_loop_s - ldrb r3, [r0], r2 - ldrb r4, [r0], r2 - ldrb r5, [r0], r2 - ldrb r6, [r0], r2 - vdup.u16 q0, r3 - vdup.u16 q1, r4 - vdup.u16 q2, r5 - vdup.u16 q3, r6 - - vqadd.s16 q8, q0, q4 - vqadd.s16 q9, q0, q5 - - vqadd.s16 q10, q1, q4 - vqadd.s16 q11, q1, q5 - - vqadd.s16 q12, q2, q4 - vqadd.s16 q13, q2, q5 - - vqadd.s16 q14, q3, q4 - vqadd.s16 q15, q3, q5 - - vqshrun.s16 d0, q8, #0 - vqshrun.s16 d1, q9, #0 - - vqshrun.s16 d2, q10, #0 - vqshrun.s16 d3, q11, #0 - - vqshrun.s16 d4, q12, #0 - vqshrun.s16 d5, q13, #0 - - vqshrun.s16 d6, q14, #0 - vqshrun.s16 d7, q15, #0 - - vst1.u8 {q0}, [r1], r2 - vst1.u8 {q1}, [r1], r2 - vst1.u8 {q2}, [r1], r2 - vst1.u8 {q3}, [r1], r2 - - subs r12, r12, #1 - bne case_tm_pred_loop_s - - pop {r4-r8,pc} - - ENDP - - - END diff --git a/vp9/common/arm/neon/vp9_copymem16x16_neon.asm b/vp9/common/arm/neon/vp9_copymem16x16_neon.asm deleted file mode 100644 index bff8156d9..000000000 --- a/vp9/common/arm/neon/vp9_copymem16x16_neon.asm +++ /dev/null @@ -1,59 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_copy_mem16x16_neon| - ; ARM - ; REQUIRE8 - ; PRESERVE8 - - AREA Block, CODE, READONLY ; name this block of code -;void copy_mem16x16_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -|vp9_copy_mem16x16_neon| PROC - - vld1.u8 {q0}, [r0], r1 - vld1.u8 {q1}, [r0], r1 - vld1.u8 {q2}, [r0], r1 - vst1.u8 {q0}, [r2], r3 - vld1.u8 {q3}, [r0], r1 - vst1.u8 {q1}, [r2], r3 - vld1.u8 {q4}, [r0], r1 - vst1.u8 {q2}, [r2], r3 - vld1.u8 {q5}, [r0], r1 - vst1.u8 {q3}, [r2], r3 - vld1.u8 {q6}, [r0], r1 - vst1.u8 {q4}, [r2], r3 - vld1.u8 {q7}, [r0], r1 - vst1.u8 {q5}, [r2], r3 - vld1.u8 {q8}, [r0], r1 - vst1.u8 {q6}, [r2], r3 - vld1.u8 {q9}, [r0], r1 - vst1.u8 {q7}, [r2], r3 - vld1.u8 {q10}, [r0], r1 - vst1.u8 {q8}, [r2], r3 - vld1.u8 {q11}, [r0], r1 - vst1.u8 {q9}, [r2], r3 - vld1.u8 {q12}, [r0], r1 - vst1.u8 {q10}, [r2], r3 - vld1.u8 {q13}, [r0], r1 - vst1.u8 {q11}, [r2], r3 - vld1.u8 {q14}, [r0], r1 - vst1.u8 {q12}, [r2], r3 - vld1.u8 {q15}, [r0], r1 - vst1.u8 {q13}, [r2], r3 - vst1.u8 {q14}, [r2], r3 - vst1.u8 {q15}, [r2], r3 - - mov pc, lr - - ENDP ; |vp9_copy_mem16x16_neon| - - END diff --git a/vp9/common/arm/neon/vp9_copymem8x4_neon.asm b/vp9/common/arm/neon/vp9_copymem8x4_neon.asm deleted file mode 100644 index ffd2df8e1..000000000 --- a/vp9/common/arm/neon/vp9_copymem8x4_neon.asm +++ /dev/null @@ -1,34 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_copy_mem8x4_neon| - ; ARM - ; REQUIRE8 - ; PRESERVE8 - - AREA Block, CODE, READONLY ; name this block of code -;void copy_mem8x4_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -|vp9_copy_mem8x4_neon| PROC - vld1.u8 {d0}, [r0], r1 - vld1.u8 {d1}, [r0], r1 - vst1.u8 {d0}, [r2], r3 - vld1.u8 {d2}, [r0], r1 - vst1.u8 {d1}, [r2], r3 - vld1.u8 {d3}, [r0], r1 - vst1.u8 {d2}, [r2], r3 - vst1.u8 {d3}, [r2], r3 - - mov pc, lr - - ENDP ; |vp9_copy_mem8x4_neon| - - END diff --git a/vp9/common/arm/neon/vp9_copymem8x8_neon.asm b/vp9/common/arm/neon/vp9_copymem8x8_neon.asm deleted file mode 100644 index 2d394c043..000000000 --- a/vp9/common/arm/neon/vp9_copymem8x8_neon.asm +++ /dev/null @@ -1,43 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_copy_mem8x8_neon| - ; ARM - ; REQUIRE8 - ; PRESERVE8 - - AREA Block, CODE, READONLY ; name this block of code -;void copy_mem8x8_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) -;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -|vp9_copy_mem8x8_neon| PROC - - vld1.u8 {d0}, [r0], r1 - vld1.u8 {d1}, [r0], r1 - vst1.u8 {d0}, [r2], r3 - vld1.u8 {d2}, [r0], r1 - vst1.u8 {d1}, [r2], r3 - vld1.u8 {d3}, [r0], r1 - vst1.u8 {d2}, [r2], r3 - vld1.u8 {d4}, [r0], r1 - vst1.u8 {d3}, [r2], r3 - vld1.u8 {d5}, [r0], r1 - vst1.u8 {d4}, [r2], r3 - vld1.u8 {d6}, [r0], r1 - vst1.u8 {d5}, [r2], r3 - vld1.u8 {d7}, [r0], r1 - vst1.u8 {d6}, [r2], r3 - vst1.u8 {d7}, [r2], r3 - - mov pc, lr - - ENDP ; |vp9_copy_mem8x8_neon| - - END diff --git a/vp9/common/arm/neon/vp9_dc_only_idct_add_neon.asm b/vp9/common/arm/neon/vp9_dc_only_idct_add_neon.asm deleted file mode 100644 index 49ba05fb0..000000000 --- a/vp9/common/arm/neon/vp9_dc_only_idct_add_neon.asm +++ /dev/null @@ -1,49 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_dc_only_idct_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp8_dc_only_idct_add_neon(short input_dc, unsigned char *pred_ptr, -; unsigned char *dst_ptr, int pitch, int stride) -; r0 input_dc -; r1 pred_ptr -; r2 dst_ptr -; r3 pitch -; sp stride -|vp8_dc_only_idct_add_neon| PROC - add r0, r0, #4 - asr r0, r0, #3 - ldr r12, [sp] - vdup.16 q0, r0 - - vld1.32 {d2[0]}, [r1], r3 - vld1.32 {d2[1]}, [r1], r3 - vld1.32 {d4[0]}, [r1], r3 - vld1.32 {d4[1]}, [r1] - - vaddw.u8 q1, q0, d2 - vaddw.u8 q2, q0, d4 - - vqmovun.s16 d2, q1 - vqmovun.s16 d4, q2 - - vst1.32 {d2[0]}, [r2], r12 - vst1.32 {d2[1]}, [r2], r12 - vst1.32 {d4[0]}, [r2], r12 - vst1.32 {d4[1]}, [r2] - - bx lr - - ENDP - END diff --git a/vp9/common/arm/neon/vp9_iwalsh_neon.asm b/vp9/common/arm/neon/vp9_iwalsh_neon.asm deleted file mode 100644 index 01c79d937..000000000 --- a/vp9/common/arm/neon/vp9_iwalsh_neon.asm +++ /dev/null @@ -1,80 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - EXPORT |vp8_short_inv_walsh4x4_neon| - EXPORT |vp8_short_inv_walsh4x4_1_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY ; name this block of code - -;short vp8_short_inv_walsh4x4_neon(short *input, short *output) -|vp8_short_inv_walsh4x4_neon| PROC - - ; read in all four lines of values: d0->d3 - vld1.i16 {q0-q1}, [r0@128] - - ; first for loop - vadd.s16 d4, d0, d3 ;a = [0] + [12] - vadd.s16 d6, d1, d2 ;b = [4] + [8] - vsub.s16 d5, d0, d3 ;d = [0] - [12] - vsub.s16 d7, d1, d2 ;c = [4] - [8] - - vadd.s16 q0, q2, q3 ; a+b d+c - vsub.s16 q1, q2, q3 ; a-b d-c - - vtrn.32 d0, d2 ;d0: 0 1 8 9 - ;d2: 2 3 10 11 - vtrn.32 d1, d3 ;d1: 4 5 12 13 - ;d3: 6 7 14 15 - - vtrn.16 d0, d1 ;d0: 0 4 8 12 - ;d1: 1 5 9 13 - vtrn.16 d2, d3 ;d2: 2 6 10 14 - ;d3: 3 7 11 15 - - ; second for loop - - vadd.s16 d4, d0, d3 ;a = [0] + [3] - vadd.s16 d6, d1, d2 ;b = [1] + [2] - vsub.s16 d5, d0, d3 ;d = [0] - [3] - vsub.s16 d7, d1, d2 ;c = [1] - [2] - - vmov.i16 q8, #3 - - vadd.s16 q0, q2, q3 ; a+b d+c - vsub.s16 q1, q2, q3 ; a-b d-c - - vadd.i16 q0, q0, q8 ;e/f += 3 - vadd.i16 q1, q1, q8 ;g/h += 3 - - vshr.s16 q0, q0, #3 ;e/f >> 3 - vshr.s16 q1, q1, #3 ;g/h >> 3 - - vst4.i16 {d0,d1,d2,d3}, [r1@128] - - bx lr - ENDP ; |vp8_short_inv_walsh4x4_neon| - - -;short vp8_short_inv_walsh4x4_1_neon(short *input, short *output) -|vp8_short_inv_walsh4x4_1_neon| PROC - ldrsh r2, [r0] ; load input[0] - add r3, r2, #3 ; add 3 - add r2, r1, #16 ; base for last 8 output - asr r0, r3, #3 ; right shift 3 - vdup.16 q0, r0 ; load and duplicate - vst1.16 {q0}, [r1@128] ; write back 8 - vst1.16 {q0}, [r2@128] ; write back last 8 - bx lr - ENDP ; |vp8_short_inv_walsh4x4_1_neon| - - END diff --git a/vp9/common/arm/neon/vp9_loopfilter_neon.asm b/vp9/common/arm/neon/vp9_loopfilter_neon.asm deleted file mode 100644 index bc6616734..000000000 --- a/vp9/common/arm/neon/vp9_loopfilter_neon.asm +++ /dev/null @@ -1,397 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_loop_filter_horizontal_edge_y_neon| - EXPORT |vp9_loop_filter_horizontal_edge_uv_neon| - EXPORT |vp9_loop_filter_vertical_edge_y_neon| - EXPORT |vp9_loop_filter_vertical_edge_uv_neon| - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src -; r1 int pitch -; r2 unsigned char blimit -; r3 unsigned char limit -; sp unsigned char thresh, -|vp9_loop_filter_horizontal_edge_y_neon| PROC - push {lr} - vdup.u8 q0, r2 ; duplicate blimit - vdup.u8 q1, r3 ; duplicate limit - sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines - ldr r3, [sp, #4] ; load thresh - add r12, r2, r1 - add r1, r1, r1 - - vdup.u8 q2, r3 ; duplicate thresh - - vld1.u8 {q3}, [r2@128], r1 ; p3 - vld1.u8 {q4}, [r12@128], r1 ; p2 - vld1.u8 {q5}, [r2@128], r1 ; p1 - vld1.u8 {q6}, [r12@128], r1 ; p0 - vld1.u8 {q7}, [r2@128], r1 ; q0 - vld1.u8 {q8}, [r12@128], r1 ; q1 - vld1.u8 {q9}, [r2@128] ; q2 - vld1.u8 {q10}, [r12@128] ; q3 - - sub r2, r2, r1, lsl #1 - sub r12, r12, r1, lsl #1 - - bl vp9_loop_filter_neon - - vst1.u8 {q5}, [r2@128], r1 ; store op1 - vst1.u8 {q6}, [r12@128], r1 ; store op0 - vst1.u8 {q7}, [r2@128], r1 ; store oq0 - vst1.u8 {q8}, [r12@128], r1 ; store oq1 - - pop {pc} - ENDP ; |vp9_loop_filter_horizontal_edge_y_neon| - - -; r0 unsigned char *u, -; r1 int pitch, -; r2 unsigned char blimit -; r3 unsigned char limit -; sp unsigned char thresh, -; sp+4 unsigned char *v -|vp9_loop_filter_horizontal_edge_uv_neon| PROC - push {lr} - vdup.u8 q0, r2 ; duplicate blimit - vdup.u8 q1, r3 ; duplicate limit - ldr r12, [sp, #4] ; load thresh - ldr r2, [sp, #8] ; load v ptr - vdup.u8 q2, r12 ; duplicate thresh - - sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines - sub r12, r2, r1, lsl #2 ; move v pointer down by 4 lines - - vld1.u8 {d6}, [r3@64], r1 ; p3 - vld1.u8 {d7}, [r12@64], r1 ; p3 - vld1.u8 {d8}, [r3@64], r1 ; p2 - vld1.u8 {d9}, [r12@64], r1 ; p2 - vld1.u8 {d10}, [r3@64], r1 ; p1 - vld1.u8 {d11}, [r12@64], r1 ; p1 - vld1.u8 {d12}, [r3@64], r1 ; p0 - vld1.u8 {d13}, [r12@64], r1 ; p0 - vld1.u8 {d14}, [r3@64], r1 ; q0 - vld1.u8 {d15}, [r12@64], r1 ; q0 - vld1.u8 {d16}, [r3@64], r1 ; q1 - vld1.u8 {d17}, [r12@64], r1 ; q1 - vld1.u8 {d18}, [r3@64], r1 ; q2 - vld1.u8 {d19}, [r12@64], r1 ; q2 - vld1.u8 {d20}, [r3@64] ; q3 - vld1.u8 {d21}, [r12@64] ; q3 - - bl vp9_loop_filter_neon - - sub r0, r0, r1, lsl #1 - sub r2, r2, r1, lsl #1 - - vst1.u8 {d10}, [r0@64], r1 ; store u op1 - vst1.u8 {d11}, [r2@64], r1 ; store v op1 - vst1.u8 {d12}, [r0@64], r1 ; store u op0 - vst1.u8 {d13}, [r2@64], r1 ; store v op0 - vst1.u8 {d14}, [r0@64], r1 ; store u oq0 - vst1.u8 {d15}, [r2@64], r1 ; store v oq0 - vst1.u8 {d16}, [r0@64] ; store u oq1 - vst1.u8 {d17}, [r2@64] ; store v oq1 - - pop {pc} - ENDP ; |vp9_loop_filter_horizontal_edge_uv_neon| - -; void vp9_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch, -; const signed char *flimit, -; const signed char *limit, -; const signed char *thresh, -; int count) -; r0 unsigned char *src -; r1 int pitch -; r2 unsigned char blimit -; r3 unsigned char limit -; sp unsigned char thresh, - -|vp9_loop_filter_vertical_edge_y_neon| PROC - push {lr} - vdup.u8 q0, r2 ; duplicate blimit - vdup.u8 q1, r3 ; duplicate limit - sub r2, r0, #4 ; src ptr down by 4 columns - add r1, r1, r1 - ldr r3, [sp, #4] ; load thresh - add r12, r2, r1, asr #1 - - vld1.u8 {d6}, [r2], r1 - vld1.u8 {d8}, [r12], r1 - vld1.u8 {d10}, [r2], r1 - vld1.u8 {d12}, [r12], r1 - vld1.u8 {d14}, [r2], r1 - vld1.u8 {d16}, [r12], r1 - vld1.u8 {d18}, [r2], r1 - vld1.u8 {d20}, [r12], r1 - - vld1.u8 {d7}, [r2], r1 ; load second 8-line src data - vld1.u8 {d9}, [r12], r1 - vld1.u8 {d11}, [r2], r1 - vld1.u8 {d13}, [r12], r1 - vld1.u8 {d15}, [r2], r1 - vld1.u8 {d17}, [r12], r1 - vld1.u8 {d19}, [r2] - vld1.u8 {d21}, [r12] - - ;transpose to 8x16 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - - vdup.u8 q2, r3 ; duplicate thresh - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - bl vp9_loop_filter_neon - - vswp d12, d11 - vswp d16, d13 - - sub r0, r0, #2 ; dst ptr - - vswp d14, d12 - vswp d16, d15 - - add r12, r0, r1, asr #1 - - ;store op1, op0, oq0, oq1 - vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1 - vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r12], r1 - vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1 - vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r12], r1 - vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1 - vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r12], r1 - vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1 - vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r12], r1 - - vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1 - vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r12], r1 - vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1 - vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r12], r1 - vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1 - vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r12], r1 - vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0] - vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r12] - - pop {pc} - ENDP ; |vp9_loop_filter_vertical_edge_y_neon| - -; void vp9_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch -; const signed char *flimit, -; const signed char *limit, -; const signed char *thresh, -; unsigned char *v) -; r0 unsigned char *u, -; r1 int pitch, -; r2 unsigned char blimit -; r3 unsigned char limit -; sp unsigned char thresh, -; sp+4 unsigned char *v -|vp9_loop_filter_vertical_edge_uv_neon| PROC - push {lr} - vdup.u8 q0, r2 ; duplicate blimit - sub r12, r0, #4 ; move u pointer down by 4 columns - ldr r2, [sp, #8] ; load v ptr - vdup.u8 q1, r3 ; duplicate limit - sub r3, r2, #4 ; move v pointer down by 4 columns - - vld1.u8 {d6}, [r12], r1 ;load u data - vld1.u8 {d7}, [r3], r1 ;load v data - vld1.u8 {d8}, [r12], r1 - vld1.u8 {d9}, [r3], r1 - vld1.u8 {d10}, [r12], r1 - vld1.u8 {d11}, [r3], r1 - vld1.u8 {d12}, [r12], r1 - vld1.u8 {d13}, [r3], r1 - vld1.u8 {d14}, [r12], r1 - vld1.u8 {d15}, [r3], r1 - vld1.u8 {d16}, [r12], r1 - vld1.u8 {d17}, [r3], r1 - vld1.u8 {d18}, [r12], r1 - vld1.u8 {d19}, [r3], r1 - vld1.u8 {d20}, [r12] - vld1.u8 {d21}, [r3] - - ldr r12, [sp, #4] ; load thresh - - ;transpose to 8x16 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - - vdup.u8 q2, r12 ; duplicate thresh - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - bl vp9_loop_filter_neon - - vswp d12, d11 - vswp d16, d13 - vswp d14, d12 - vswp d16, d15 - - sub r0, r0, #2 - sub r2, r2, #2 - - ;store op1, op0, oq0, oq1 - vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1 - vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1 - vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1 - vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r2], r1 - vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1 - vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r2], r1 - vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1 - vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r2], r1 - vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1 - vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r2], r1 - vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1 - vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r2], r1 - vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1 - vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r2], r1 - vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0] - vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2] - - pop {pc} - ENDP ; |vp9_loop_filter_vertical_edge_uv_neon| - -; void vp9_loop_filter_neon(); -; This is a helper function for the loopfilters. The invidual functions do the -; necessary load, transpose (if necessary) and store. - -; r0-r3 PRESERVE -; q0 flimit -; q1 limit -; q2 thresh -; q3 p3 -; q4 p2 -; q5 p1 -; q6 p0 -; q7 q0 -; q8 q1 -; q9 q2 -; q10 q3 -|vp9_loop_filter_neon| PROC - - ; vp9_filter_mask - vabd.u8 q11, q3, q4 ; abs(p3 - p2) - vabd.u8 q12, q4, q5 ; abs(p2 - p1) - vabd.u8 q13, q5, q6 ; abs(p1 - p0) - vabd.u8 q14, q8, q7 ; abs(q1 - q0) - vabd.u8 q3, q9, q8 ; abs(q2 - q1) - vabd.u8 q4, q10, q9 ; abs(q3 - q2) - - vmax.u8 q11, q11, q12 - vmax.u8 q12, q13, q14 - vmax.u8 q3, q3, q4 - vmax.u8 q15, q11, q12 - - vabd.u8 q9, q6, q7 ; abs(p0 - q0) - - ; vp8_hevmask - vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 - vmax.u8 q15, q15, q3 - - vmov.u8 q10, #0x80 ; 0x80 - - vabd.u8 q2, q5, q8 ; a = abs(p1 - q1) - vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2 - - vcge.u8 q15, q1, q15 - - ; vp9_filter() function - ; convert to signed - veor q7, q7, q10 ; qs0 - vshr.u8 q2, q2, #1 ; a = a / 2 - veor q6, q6, q10 ; ps0 - - veor q5, q5, q10 ; ps1 - vqadd.u8 q9, q9, q2 ; a = b + a - - veor q8, q8, q10 ; qs1 - - vmov.u8 q10, #3 ; #3 - - vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) - vsubl.s8 q11, d15, d13 - - vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1 - - vmovl.u8 q4, d20 - - vqsub.s8 q1, q5, q8 ; vp9_filter = clamp(ps1-qs1) - vorr q14, q13, q14 ; vp8_hevmask - - vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0) - vmul.i16 q11, q11, q4 - - vand q1, q1, q14 ; vp9_filter &= hev - vand q15, q15, q9 ; vp9_filter_mask - - vaddw.s8 q2, q2, d2 - vaddw.s8 q11, q11, d3 - - vmov.u8 q9, #4 ; #4 - - ; vp9_filter = clamp(vp9_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d2, q2 - vqmovn.s16 d3, q11 - vand q1, q1, q15 ; vp9_filter &= mask - - vqadd.s8 q2, q1, q10 ; Filter2 = clamp(vp9_filter+3) - vqadd.s8 q1, q1, q9 ; Filter1 = clamp(vp9_filter+4) - vshr.s8 q2, q2, #3 ; Filter2 >>= 3 - vshr.s8 q1, q1, #3 ; Filter1 >>= 3 - - - vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + Filter2) - vqsub.s8 q10, q7, q1 ; u = clamp(qs0 - Filter1) - - ; outer tap adjustments: ++vp9_filter >> 1 - vrshr.s8 q1, q1, #1 - vbic q1, q1, q14 ; vp9_filter &= ~hev - vmov.u8 q0, #0x80 ; 0x80 - vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + vp9_filter) - vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - vp9_filter) - - veor q6, q11, q0 ; *op0 = u^0x80 - veor q7, q10, q0 ; *oq0 = u^0x80 - veor q5, q13, q0 ; *op1 = u^0x80 - veor q8, q12, q0 ; *oq1 = u^0x80 - - bx lr - ENDP ; |vp9_loop_filter_horizontal_edge_y_neon| - -;----------------- - - END diff --git a/vp9/common/arm/neon/vp9_loopfiltersimplehorizontaledge_neon.asm b/vp9/common/arm/neon/vp9_loopfiltersimplehorizontaledge_neon.asm deleted file mode 100644 index eb07ce0d5..000000000 --- a/vp9/common/arm/neon/vp9_loopfiltersimplehorizontaledge_neon.asm +++ /dev/null @@ -1,117 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - ;EXPORT |vp9_loop_filter_simple_horizontal_edge_neon| - EXPORT |vp9_loop_filter_bhs_neon| - EXPORT |vp9_loop_filter_mbhs_neon| - ARM - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *s, PRESERVE -; r1 int p, PRESERVE -; q1 limit, PRESERVE - -|vp9_loop_filter_simple_horizontal_edge_neon| PROC - - sub r3, r0, r1, lsl #1 ; move src pointer down by 2 lines - - vld1.u8 {q7}, [r0@128], r1 ; q0 - vld1.u8 {q5}, [r3@128], r1 ; p0 - vld1.u8 {q8}, [r0@128] ; q1 - vld1.u8 {q6}, [r3@128] ; p1 - - vabd.u8 q15, q6, q7 ; abs(p0 - q0) - vabd.u8 q14, q5, q8 ; abs(p1 - q1) - - vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2 - vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2 - vmov.u8 q0, #0x80 ; 0x80 - vmov.s16 q13, #3 - vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - - veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value - veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value - veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value - veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value - - vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > limit)*-1 - - vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) - vsubl.s8 q3, d15, d13 - - vqsub.s8 q4, q5, q8 ; q4: vp9_filter = vp9_signed_char_clamp(ps1-qs1) - - vmul.s16 q2, q2, q13 ; 3 * ( qs0 - ps0) - vmul.s16 q3, q3, q13 - - vmov.u8 q10, #0x03 ; 0x03 - vmov.u8 q9, #0x04 ; 0x04 - - vaddw.s8 q2, q2, d8 ; vp9_filter + 3 * ( qs0 - ps0) - vaddw.s8 q3, q3, d9 - - vqmovn.s16 d8, q2 ; vp9_filter = vp9_signed_char_clamp(vp9_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d9, q3 - - vand q14, q4, q15 ; vp9_filter &= mask - - vqadd.s8 q2, q14, q10 ; Filter2 = vp9_signed_char_clamp(vp9_filter+3) - vqadd.s8 q3, q14, q9 ; Filter1 = vp9_signed_char_clamp(vp9_filter+4) - vshr.s8 q2, q2, #3 ; Filter2 >>= 3 - vshr.s8 q4, q3, #3 ; Filter1 >>= 3 - - sub r0, r0, r1 - - ;calculate output - vqadd.s8 q11, q6, q2 ; u = vp9_signed_char_clamp(ps0 + Filter2) - vqsub.s8 q10, q7, q4 ; u = vp9_signed_char_clamp(qs0 - Filter1) - - veor q6, q11, q0 ; *op0 = u^0x80 - veor q7, q10, q0 ; *oq0 = u^0x80 - - vst1.u8 {q6}, [r3@128] ; store op0 - vst1.u8 {q7}, [r0@128] ; store oq0 - - bx lr - ENDP ; |vp9_loop_filter_simple_horizontal_edge_neon| - -; r0 unsigned char *y -; r1 int ystride -; r2 const unsigned char *blimit - -|vp9_loop_filter_bhs_neon| PROC - push {r4, lr} - ldrb r3, [r2] ; load blim from mem - vdup.s8 q1, r3 ; duplicate blim - - add r0, r0, r1, lsl #2 ; src = y_ptr + 4 * y_stride - bl vp9_loop_filter_simple_horizontal_edge_neon - ; vp9_loop_filter_simple_horizontal_edge_neon preserves r0, r1 and q1 - add r0, r0, r1, lsl #2 ; src = y_ptr + 8* y_stride - bl vp9_loop_filter_simple_horizontal_edge_neon - add r0, r0, r1, lsl #2 ; src = y_ptr + 12 * y_stride - pop {r4, lr} - b vp9_loop_filter_simple_horizontal_edge_neon - ENDP ;|vp9_loop_filter_bhs_neon| - -; r0 unsigned char *y -; r1 int ystride -; r2 const unsigned char *blimit - -|vp9_loop_filter_mbhs_neon| PROC - ldrb r3, [r2] ; load blim from mem - vdup.s8 q1, r3 ; duplicate mblim - b vp9_loop_filter_simple_horizontal_edge_neon - ENDP ;|vp9_loop_filter_bhs_neon| - - END diff --git a/vp9/common/arm/neon/vp9_loopfiltersimpleverticaledge_neon.asm b/vp9/common/arm/neon/vp9_loopfiltersimpleverticaledge_neon.asm deleted file mode 100644 index d5cf8c2b5..000000000 --- a/vp9/common/arm/neon/vp9_loopfiltersimpleverticaledge_neon.asm +++ /dev/null @@ -1,154 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - ;EXPORT |vp9_loop_filter_simple_vertical_edge_neon| - EXPORT |vp9_loop_filter_bvs_neon| - EXPORT |vp9_loop_filter_mbvs_neon| - ARM - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *s, PRESERVE -; r1 int p, PRESERVE -; q1 limit, PRESERVE - -|vp9_loop_filter_simple_vertical_edge_neon| PROC - sub r0, r0, #2 ; move src pointer down by 2 columns - add r12, r1, r1 - add r3, r0, r1 - - vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r12 - vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r3], r12 - vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r12 - vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r3], r12 - vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r12 - vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r3], r12 - vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r12 - vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r3], r12 - - vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r12 - vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r3], r12 - vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r12 - vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r3], r12 - vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r12 - vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r3], r12 - vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r12 - vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r3] - - vswp d7, d10 - vswp d12, d9 - - ;vp9_filter_mask() function - ;vp8_hevmask() function - sub r0, r0, r1, lsl #4 - vabd.u8 q15, q5, q4 ; abs(p0 - q0) - vabd.u8 q14, q3, q6 ; abs(p1 - q1) - - vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2 - vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2 - vmov.u8 q0, #0x80 ; 0x80 - vmov.s16 q11, #3 - vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - - veor q4, q4, q0 ; qs0: q0 offset to convert to a signed value - veor q5, q5, q0 ; ps0: p0 offset to convert to a signed value - veor q3, q3, q0 ; ps1: p1 offset to convert to a signed value - veor q6, q6, q0 ; qs1: q1 offset to convert to a signed value - - vcge.u8 q15, q1, q15 ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1 - - vsubl.s8 q2, d8, d10 ; ( qs0 - ps0) - vsubl.s8 q13, d9, d11 - - vqsub.s8 q14, q3, q6 ; vp9_filter = vp9_signed_char_clamp(ps1-qs1) - - vmul.s16 q2, q2, q11 ; 3 * ( qs0 - ps0) - vmul.s16 q13, q13, q11 - - vmov.u8 q11, #0x03 ; 0x03 - vmov.u8 q12, #0x04 ; 0x04 - - vaddw.s8 q2, q2, d28 ; vp9_filter + 3 * ( qs0 - ps0) - vaddw.s8 q13, q13, d29 - - vqmovn.s16 d28, q2 ; vp9_filter = vp9_signed_char_clamp(vp9_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d29, q13 - - add r0, r0, #1 - add r3, r0, r1 - - vand q14, q14, q15 ; vp9_filter &= mask - - vqadd.s8 q2, q14, q11 ; Filter2 = vp9_signed_char_clamp(vp9_filter+3) - vqadd.s8 q3, q14, q12 ; Filter1 = vp9_signed_char_clamp(vp9_filter+4) - vshr.s8 q2, q2, #3 ; Filter2 >>= 3 - vshr.s8 q14, q3, #3 ; Filter1 >>= 3 - - ;calculate output - vqadd.s8 q11, q5, q2 ; u = vp9_signed_char_clamp(ps0 + Filter2) - vqsub.s8 q10, q4, q14 ; u = vp9_signed_char_clamp(qs0 - Filter1) - - veor q6, q11, q0 ; *op0 = u^0x80 - veor q7, q10, q0 ; *oq0 = u^0x80 - add r12, r1, r1 - vswp d13, d14 - - ;store op1, op0, oq0, oq1 - vst2.8 {d12[0], d13[0]}, [r0], r12 - vst2.8 {d12[1], d13[1]}, [r3], r12 - vst2.8 {d12[2], d13[2]}, [r0], r12 - vst2.8 {d12[3], d13[3]}, [r3], r12 - vst2.8 {d12[4], d13[4]}, [r0], r12 - vst2.8 {d12[5], d13[5]}, [r3], r12 - vst2.8 {d12[6], d13[6]}, [r0], r12 - vst2.8 {d12[7], d13[7]}, [r3], r12 - vst2.8 {d14[0], d15[0]}, [r0], r12 - vst2.8 {d14[1], d15[1]}, [r3], r12 - vst2.8 {d14[2], d15[2]}, [r0], r12 - vst2.8 {d14[3], d15[3]}, [r3], r12 - vst2.8 {d14[4], d15[4]}, [r0], r12 - vst2.8 {d14[5], d15[5]}, [r3], r12 - vst2.8 {d14[6], d15[6]}, [r0], r12 - vst2.8 {d14[7], d15[7]}, [r3] - - bx lr - ENDP ; |vp9_loop_filter_simple_vertical_edge_neon| - -; r0 unsigned char *y -; r1 int ystride -; r2 const unsigned char *blimit - -|vp9_loop_filter_bvs_neon| PROC - push {r4, lr} - ldrb r3, [r2] ; load blim from mem - mov r4, r0 - add r0, r0, #4 - vdup.s8 q1, r3 ; duplicate blim - bl vp9_loop_filter_simple_vertical_edge_neon - ; vp9_loop_filter_simple_vertical_edge_neon preserves r1 and q1 - add r0, r4, #8 - bl vp9_loop_filter_simple_vertical_edge_neon - add r0, r4, #12 - pop {r4, lr} - b vp9_loop_filter_simple_vertical_edge_neon - ENDP ;|vp9_loop_filter_bvs_neon| - -; r0 unsigned char *y -; r1 int ystride -; r2 const unsigned char *blimit - -|vp9_loop_filter_mbvs_neon| PROC - ldrb r3, [r2] ; load mblim from mem - vdup.s8 q1, r3 ; duplicate mblim - b vp9_loop_filter_simple_vertical_edge_neon - ENDP ;|vp9_loop_filter_bvs_neon| - END diff --git a/vp9/common/arm/neon/vp9_mbloopfilter_neon.asm b/vp9/common/arm/neon/vp9_mbloopfilter_neon.asm deleted file mode 100644 index 19b67f47d..000000000 --- a/vp9/common/arm/neon/vp9_mbloopfilter_neon.asm +++ /dev/null @@ -1,469 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_mbloop_filter_horizontal_edge_y_neon| - EXPORT |vp8_mbloop_filter_horizontal_edge_uv_neon| - EXPORT |vp8_mbloop_filter_vertical_edge_y_neon| - EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon| - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch, -; const unsigned char *blimit, -; const unsigned char *limit, -; const unsigned char *thresh) -; r0 unsigned char *src, -; r1 int pitch, -; r2 unsigned char blimit -; r3 unsigned char limit -; sp unsigned char thresh, -|vp8_mbloop_filter_horizontal_edge_y_neon| PROC - push {lr} - add r1, r1, r1 ; double stride - ldr r12, [sp, #4] ; load thresh - sub r0, r0, r1, lsl #1 ; move src pointer down by 4 lines - vdup.u8 q2, r12 ; thresh - add r12, r0, r1, lsr #1 ; move src pointer up by 1 line - - vld1.u8 {q3}, [r0@128], r1 ; p3 - vld1.u8 {q4}, [r12@128], r1 ; p2 - vld1.u8 {q5}, [r0@128], r1 ; p1 - vld1.u8 {q6}, [r12@128], r1 ; p0 - vld1.u8 {q7}, [r0@128], r1 ; q0 - vld1.u8 {q8}, [r12@128], r1 ; q1 - vld1.u8 {q9}, [r0@128], r1 ; q2 - vld1.u8 {q10}, [r12@128], r1 ; q3 - - bl vp8_mbloop_filter_neon - - sub r12, r12, r1, lsl #2 - add r0, r12, r1, lsr #1 - - vst1.u8 {q4}, [r12@128],r1 ; store op2 - vst1.u8 {q5}, [r0@128],r1 ; store op1 - vst1.u8 {q6}, [r12@128], r1 ; store op0 - vst1.u8 {q7}, [r0@128],r1 ; store oq0 - vst1.u8 {q8}, [r12@128] ; store oq1 - vst1.u8 {q9}, [r0@128] ; store oq2 - - pop {pc} - ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon| - -; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch, -; const unsigned char *blimit, -; const unsigned char *limit, -; const unsigned char *thresh, -; unsigned char *v) -; r0 unsigned char *u, -; r1 int pitch, -; r2 unsigned char blimit -; r3 unsigned char limit -; sp unsigned char thresh, -; sp+4 unsigned char *v - -|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC - push {lr} - ldr r12, [sp, #4] ; load thresh - sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines - vdup.u8 q2, r12 ; thresh - ldr r12, [sp, #8] ; load v ptr - sub r12, r12, r1, lsl #2 ; move v pointer down by 4 lines - - vld1.u8 {d6}, [r0@64], r1 ; p3 - vld1.u8 {d7}, [r12@64], r1 ; p3 - vld1.u8 {d8}, [r0@64], r1 ; p2 - vld1.u8 {d9}, [r12@64], r1 ; p2 - vld1.u8 {d10}, [r0@64], r1 ; p1 - vld1.u8 {d11}, [r12@64], r1 ; p1 - vld1.u8 {d12}, [r0@64], r1 ; p0 - vld1.u8 {d13}, [r12@64], r1 ; p0 - vld1.u8 {d14}, [r0@64], r1 ; q0 - vld1.u8 {d15}, [r12@64], r1 ; q0 - vld1.u8 {d16}, [r0@64], r1 ; q1 - vld1.u8 {d17}, [r12@64], r1 ; q1 - vld1.u8 {d18}, [r0@64], r1 ; q2 - vld1.u8 {d19}, [r12@64], r1 ; q2 - vld1.u8 {d20}, [r0@64], r1 ; q3 - vld1.u8 {d21}, [r12@64], r1 ; q3 - - bl vp8_mbloop_filter_neon - - sub r0, r0, r1, lsl #3 - sub r12, r12, r1, lsl #3 - - add r0, r0, r1 - add r12, r12, r1 - - vst1.u8 {d8}, [r0@64], r1 ; store u op2 - vst1.u8 {d9}, [r12@64], r1 ; store v op2 - vst1.u8 {d10}, [r0@64], r1 ; store u op1 - vst1.u8 {d11}, [r12@64], r1 ; store v op1 - vst1.u8 {d12}, [r0@64], r1 ; store u op0 - vst1.u8 {d13}, [r12@64], r1 ; store v op0 - vst1.u8 {d14}, [r0@64], r1 ; store u oq0 - vst1.u8 {d15}, [r12@64], r1 ; store v oq0 - vst1.u8 {d16}, [r0@64], r1 ; store u oq1 - vst1.u8 {d17}, [r12@64], r1 ; store v oq1 - vst1.u8 {d18}, [r0@64], r1 ; store u oq2 - vst1.u8 {d19}, [r12@64], r1 ; store v oq2 - - pop {pc} - ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon| - -; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch, -; const unsigned char *blimit, -; const unsigned char *limit, -; const unsigned char *thresh) -; r0 unsigned char *src, -; r1 int pitch, -; r2 unsigned char blimit -; r3 unsigned char limit -; sp unsigned char thresh, -|vp8_mbloop_filter_vertical_edge_y_neon| PROC - push {lr} - ldr r12, [sp, #4] ; load thresh - sub r0, r0, #4 ; move src pointer down by 4 columns - vdup.s8 q2, r12 ; thresh - add r12, r0, r1, lsl #3 ; move src pointer down by 8 lines - - vld1.u8 {d6}, [r0], r1 ; load first 8-line src data - vld1.u8 {d7}, [r12], r1 ; load second 8-line src data - vld1.u8 {d8}, [r0], r1 - vld1.u8 {d9}, [r12], r1 - vld1.u8 {d10}, [r0], r1 - vld1.u8 {d11}, [r12], r1 - vld1.u8 {d12}, [r0], r1 - vld1.u8 {d13}, [r12], r1 - vld1.u8 {d14}, [r0], r1 - vld1.u8 {d15}, [r12], r1 - vld1.u8 {d16}, [r0], r1 - vld1.u8 {d17}, [r12], r1 - vld1.u8 {d18}, [r0], r1 - vld1.u8 {d19}, [r12], r1 - vld1.u8 {d20}, [r0], r1 - vld1.u8 {d21}, [r12], r1 - - ;transpose to 8x16 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - sub r0, r0, r1, lsl #3 - - bl vp8_mbloop_filter_neon - - sub r12, r12, r1, lsl #3 - - ;transpose to 16x8 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - ;store op2, op1, op0, oq0, oq1, oq2 - vst1.8 {d6}, [r0], r1 - vst1.8 {d7}, [r12], r1 - vst1.8 {d8}, [r0], r1 - vst1.8 {d9}, [r12], r1 - vst1.8 {d10}, [r0], r1 - vst1.8 {d11}, [r12], r1 - vst1.8 {d12}, [r0], r1 - vst1.8 {d13}, [r12], r1 - vst1.8 {d14}, [r0], r1 - vst1.8 {d15}, [r12], r1 - vst1.8 {d16}, [r0], r1 - vst1.8 {d17}, [r12], r1 - vst1.8 {d18}, [r0], r1 - vst1.8 {d19}, [r12], r1 - vst1.8 {d20}, [r0] - vst1.8 {d21}, [r12] - - pop {pc} - ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon| - -; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch, -; const unsigned char *blimit, -; const unsigned char *limit, -; const unsigned char *thresh, -; unsigned char *v) -; r0 unsigned char *u, -; r1 int pitch, -; r2 const signed char *flimit, -; r3 const signed char *limit, -; sp const signed char *thresh, -; sp+4 unsigned char *v -|vp8_mbloop_filter_vertical_edge_uv_neon| PROC - push {lr} - ldr r12, [sp, #4] ; load thresh - sub r0, r0, #4 ; move u pointer down by 4 columns - vdup.u8 q2, r12 ; thresh - ldr r12, [sp, #8] ; load v ptr - sub r12, r12, #4 ; move v pointer down by 4 columns - - vld1.u8 {d6}, [r0], r1 ;load u data - vld1.u8 {d7}, [r12], r1 ;load v data - vld1.u8 {d8}, [r0], r1 - vld1.u8 {d9}, [r12], r1 - vld1.u8 {d10}, [r0], r1 - vld1.u8 {d11}, [r12], r1 - vld1.u8 {d12}, [r0], r1 - vld1.u8 {d13}, [r12], r1 - vld1.u8 {d14}, [r0], r1 - vld1.u8 {d15}, [r12], r1 - vld1.u8 {d16}, [r0], r1 - vld1.u8 {d17}, [r12], r1 - vld1.u8 {d18}, [r0], r1 - vld1.u8 {d19}, [r12], r1 - vld1.u8 {d20}, [r0], r1 - vld1.u8 {d21}, [r12], r1 - - ;transpose to 8x16 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - sub r0, r0, r1, lsl #3 - - bl vp8_mbloop_filter_neon - - sub r12, r12, r1, lsl #3 - - ;transpose to 16x8 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - ;store op2, op1, op0, oq0, oq1, oq2 - vst1.8 {d6}, [r0], r1 - vst1.8 {d7}, [r12], r1 - vst1.8 {d8}, [r0], r1 - vst1.8 {d9}, [r12], r1 - vst1.8 {d10}, [r0], r1 - vst1.8 {d11}, [r12], r1 - vst1.8 {d12}, [r0], r1 - vst1.8 {d13}, [r12], r1 - vst1.8 {d14}, [r0], r1 - vst1.8 {d15}, [r12], r1 - vst1.8 {d16}, [r0], r1 - vst1.8 {d17}, [r12], r1 - vst1.8 {d18}, [r0], r1 - vst1.8 {d19}, [r12], r1 - vst1.8 {d20}, [r0] - vst1.8 {d21}, [r12] - - pop {pc} - ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon| - -; void vp8_mbloop_filter_neon() -; This is a helper function for the macroblock loopfilters. The individual -; functions do the necessary load, transpose (if necessary), preserve (if -; necessary) and store. - -; r0,r1 PRESERVE -; r2 mblimit -; r3 limit - -; q2 thresh -; q3 p3 PRESERVE -; q4 p2 -; q5 p1 -; q6 p0 -; q7 q0 -; q8 q1 -; q9 q2 -; q10 q3 PRESERVE - -|vp8_mbloop_filter_neon| PROC - - ; vp9_filter_mask - vabd.u8 q11, q3, q4 ; abs(p3 - p2) - vabd.u8 q12, q4, q5 ; abs(p2 - p1) - vabd.u8 q13, q5, q6 ; abs(p1 - p0) - vabd.u8 q14, q8, q7 ; abs(q1 - q0) - vabd.u8 q1, q9, q8 ; abs(q2 - q1) - vabd.u8 q0, q10, q9 ; abs(q3 - q2) - - vmax.u8 q11, q11, q12 - vmax.u8 q12, q13, q14 - vmax.u8 q1, q1, q0 - vmax.u8 q15, q11, q12 - - vabd.u8 q12, q6, q7 ; abs(p0 - q0) - - ; vp8_hevmask - vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh) * -1 - vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh) * -1 - vmax.u8 q15, q15, q1 - - vdup.u8 q1, r3 ; limit - vdup.u8 q2, r2 ; mblimit - - vmov.u8 q0, #0x80 ; 0x80 - - vcge.u8 q15, q1, q15 - - vabd.u8 q1, q5, q8 ; a = abs(p1 - q1) - vqadd.u8 q12, q12, q12 ; b = abs(p0 - q0) * 2 - vmov.u16 q11, #3 ; #3 - - ; vp9_filter - ; convert to signed - veor q7, q7, q0 ; qs0 - vshr.u8 q1, q1, #1 ; a = a / 2 - veor q6, q6, q0 ; ps0 - veor q5, q5, q0 ; ps1 - - vqadd.u8 q12, q12, q1 ; a = b + a - - veor q8, q8, q0 ; qs1 - veor q4, q4, q0 ; ps2 - veor q9, q9, q0 ; qs2 - - vorr q14, q13, q14 ; vp8_hevmask - - vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1 - - vsubl.s8 q2, d14, d12 ; qs0 - ps0 - vsubl.s8 q13, d15, d13 - - vqsub.s8 q1, q5, q8 ; vp9_filter = clamp(ps1-qs1) - - vmul.i16 q2, q2, q11 ; 3 * ( qs0 - ps0) - - vand q15, q15, q12 ; vp9_filter_mask - - vmul.i16 q13, q13, q11 - - vmov.u8 q12, #3 ; #3 - - vaddw.s8 q2, q2, d2 ; vp9_filter + 3 * ( qs0 - ps0) - vaddw.s8 q13, q13, d3 - - vmov.u8 q11, #4 ; #4 - - ; vp9_filter = clamp(vp9_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d2, q2 - vqmovn.s16 d3, q13 - - vand q1, q1, q15 ; vp9_filter &= mask - - vmov.u16 q15, #63 ; #63 - - vand q13, q1, q14 ; Filter2 &= hev - - vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4) - vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3) - - vmov q0, q15 - - vshr.s8 q2, q2, #3 ; Filter1 >>= 3 - vshr.s8 q13, q13, #3 ; Filter2 >>= 3 - - vmov q11, q15 - vmov q12, q15 - - vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1) - - vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2) - - vbic q1, q1, q14 ; vp9_filter &= ~hev - - ; roughly 1/7th difference across boundary - ; roughly 2/7th difference across boundary - ; roughly 3/7th difference across boundary - - vmov.u8 d5, #9 ; #9 - vmov.u8 d4, #18 ; #18 - - vmov q13, q15 - vmov q14, q15 - - vmlal.s8 q0, d2, d5 ; 63 + Filter2 * 9 - vmlal.s8 q11, d3, d5 - vmov.u8 d5, #27 ; #27 - vmlal.s8 q12, d2, d4 ; 63 + Filter2 * 18 - vmlal.s8 q13, d3, d4 - vmlal.s8 q14, d2, d5 ; 63 + Filter2 * 27 - vmlal.s8 q15, d3, d5 - - vqshrn.s16 d0, q0, #7 ; u = clamp((63 + Filter2 * 9)>>7) - vqshrn.s16 d1, q11, #7 - vqshrn.s16 d24, q12, #7 ; u = clamp((63 + Filter2 * 18)>>7) - vqshrn.s16 d25, q13, #7 - vqshrn.s16 d28, q14, #7 ; u = clamp((63 + Filter2 * 27)>>7) - vqshrn.s16 d29, q15, #7 - - vmov.u8 q1, #0x80 ; 0x80 - - vqsub.s8 q11, q9, q0 ; s = clamp(qs2 - u) - vqadd.s8 q0, q4, q0 ; s = clamp(ps2 + u) - vqsub.s8 q13, q8, q12 ; s = clamp(qs1 - u) - vqadd.s8 q12, q5, q12 ; s = clamp(ps1 + u) - vqsub.s8 q15, q7, q14 ; s = clamp(qs0 - u) - vqadd.s8 q14, q6, q14 ; s = clamp(ps0 + u) - - veor q9, q11, q1 ; *oq2 = s^0x80 - veor q4, q0, q1 ; *op2 = s^0x80 - veor q8, q13, q1 ; *oq1 = s^0x80 - veor q5, q12, q1 ; *op2 = s^0x80 - veor q7, q15, q1 ; *oq0 = s^0x80 - veor q6, q14, q1 ; *op0 = s^0x80 - - bx lr - ENDP ; |vp8_mbloop_filter_neon| - -;----------------- - - END diff --git a/vp9/common/arm/neon/vp9_recon16x16mb_neon.asm b/vp9/common/arm/neon/vp9_recon16x16mb_neon.asm deleted file mode 100644 index 3f1a30f48..000000000 --- a/vp9/common/arm/neon/vp9_recon16x16mb_neon.asm +++ /dev/null @@ -1,131 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_recon16x16mb_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *pred_ptr, -; r1 short *diff_ptr, -; r2 unsigned char *dst_ptr, -; r3 int ystride, -; stack unsigned char *udst_ptr, -; stack unsigned char *vdst_ptr - -|vp8_recon16x16mb_neon| PROC - mov r12, #4 ;loop counter for Y loop - -recon16x16mb_loop_y - vld1.u8 {q12, q13}, [r0]! ;load data from pred_ptr - vld1.16 {q8, q9}, [r1]! ;load data from diff_ptr - vld1.u8 {q14, q15}, [r0]! - vld1.16 {q10, q11}, [r1]! - - vmovl.u8 q0, d24 ;modify Pred data from 8 bits to 16 bits - vmovl.u8 q1, d25 - vmovl.u8 q2, d26 - vmovl.u8 q3, d27 - vmovl.u8 q4, d28 - vmovl.u8 q5, d29 - vmovl.u8 q6, d30 - vld1.16 {q12, q13}, [r1]! - vmovl.u8 q7, d31 - vld1.16 {q14, q15}, [r1]! - - pld [r0] - pld [r1] - pld [r1, #64] - - vadd.s16 q0, q0, q8 ;add Diff data and Pred data together - vadd.s16 q1, q1, q9 - vadd.s16 q2, q2, q10 - vadd.s16 q3, q3, q11 - vadd.s16 q4, q4, q12 - vadd.s16 q5, q5, q13 - vadd.s16 q6, q6, q14 - vadd.s16 q7, q7, q15 - - vqmovun.s16 d0, q0 ;CLAMP() saturation - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q2 - vqmovun.s16 d3, q3 - vqmovun.s16 d4, q4 - vqmovun.s16 d5, q5 - vst1.u8 {q0}, [r2], r3 ;store result - vqmovun.s16 d6, q6 - vst1.u8 {q1}, [r2], r3 - vqmovun.s16 d7, q7 - vst1.u8 {q2}, [r2], r3 - subs r12, r12, #1 - - moveq r12, #2 ;loop counter for UV loop - - vst1.u8 {q3}, [r2], r3 - bne recon16x16mb_loop_y - - mov r3, r3, lsr #1 ;uv_stride = ystride>>1 - ldr r2, [sp] ;load upred_ptr - -recon16x16mb_loop_uv - vld1.u8 {q12, q13}, [r0]! ;load data from pred_ptr - vld1.16 {q8, q9}, [r1]! ;load data from diff_ptr - vld1.u8 {q14, q15}, [r0]! - vld1.16 {q10, q11}, [r1]! - - vmovl.u8 q0, d24 ;modify Pred data from 8 bits to 16 bits - vmovl.u8 q1, d25 - vmovl.u8 q2, d26 - vmovl.u8 q3, d27 - vmovl.u8 q4, d28 - vmovl.u8 q5, d29 - vmovl.u8 q6, d30 - vld1.16 {q12, q13}, [r1]! - vmovl.u8 q7, d31 - vld1.16 {q14, q15}, [r1]! - - vadd.s16 q0, q0, q8 ;add Diff data and Pred data together - vadd.s16 q1, q1, q9 - vadd.s16 q2, q2, q10 - vadd.s16 q3, q3, q11 - vadd.s16 q4, q4, q12 - vadd.s16 q5, q5, q13 - vadd.s16 q6, q6, q14 - - vqmovun.s16 d0, q0 ;CLAMP() saturation - vadd.s16 q7, q7, q15 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q2 - vqmovun.s16 d3, q3 - vst1.u8 {d0}, [r2], r3 ;store result - vqmovun.s16 d4, q4 - vst1.u8 {d1}, [r2], r3 - vqmovun.s16 d5, q5 - vst1.u8 {d2}, [r2], r3 - vqmovun.s16 d6, q6 - vst1.u8 {d3}, [r2], r3 - vqmovun.s16 d7, q7 - vst1.u8 {d4}, [r2], r3 - subs r12, r12, #1 - - vst1.u8 {d5}, [r2], r3 - vst1.u8 {d6}, [r2], r3 - vst1.u8 {d7}, [r2], r3 - - ldrne r2, [sp, #4] ;load vpred_ptr - bne recon16x16mb_loop_uv - - bx lr - - ENDP - END diff --git a/vp9/common/arm/neon/vp9_recon2b_neon.asm b/vp9/common/arm/neon/vp9_recon2b_neon.asm deleted file mode 100644 index 99b251c91..000000000 --- a/vp9/common/arm/neon/vp9_recon2b_neon.asm +++ /dev/null @@ -1,54 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_recon2b_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *pred_ptr, -; r1 short *diff_ptr, -; r2 unsigned char *dst_ptr, -; r3 int stride - -|vp8_recon2b_neon| PROC - vld1.u8 {q8, q9}, [r0] ;load data from pred_ptr - vld1.16 {q4, q5}, [r1]! ;load data from diff_ptr - - vmovl.u8 q0, d16 ;modify Pred data from 8 bits to 16 bits - vld1.16 {q6, q7}, [r1]! - vmovl.u8 q1, d17 - vmovl.u8 q2, d18 - vmovl.u8 q3, d19 - - vadd.s16 q0, q0, q4 ;add Diff data and Pred data together - vadd.s16 q1, q1, q5 - vadd.s16 q2, q2, q6 - vadd.s16 q3, q3, q7 - - vqmovun.s16 d0, q0 ;CLAMP() saturation - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q2 - vqmovun.s16 d3, q3 - add r0, r2, r3 - - vst1.u8 {d0}, [r2] ;store result - vst1.u8 {d1}, [r0], r3 - add r2, r0, r3 - vst1.u8 {d2}, [r0] - vst1.u8 {d3}, [r2], r3 - - bx lr - - ENDP - END diff --git a/vp9/common/arm/neon/vp9_recon4b_neon.asm b/vp9/common/arm/neon/vp9_recon4b_neon.asm deleted file mode 100644 index 991727746..000000000 --- a/vp9/common/arm/neon/vp9_recon4b_neon.asm +++ /dev/null @@ -1,69 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_recon4b_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *pred_ptr, -; r1 short *diff_ptr, -; r2 unsigned char *dst_ptr, -; r3 int stride - -|vp8_recon4b_neon| PROC - vld1.u8 {q12, q13}, [r0]! ;load data from pred_ptr - vld1.16 {q8, q9}, [r1]! ;load data from diff_ptr - vld1.u8 {q14, q15}, [r0] - vld1.16 {q10, q11}, [r1]! - - vmovl.u8 q0, d24 ;modify Pred data from 8 bits to 16 bits - vmovl.u8 q1, d25 - vmovl.u8 q2, d26 - vmovl.u8 q3, d27 - vmovl.u8 q4, d28 - vmovl.u8 q5, d29 - vmovl.u8 q6, d30 - vld1.16 {q12, q13}, [r1]! - vmovl.u8 q7, d31 - vld1.16 {q14, q15}, [r1] - - vadd.s16 q0, q0, q8 ;add Diff data and Pred data together - vadd.s16 q1, q1, q9 - vadd.s16 q2, q2, q10 - vadd.s16 q3, q3, q11 - vadd.s16 q4, q4, q12 - vadd.s16 q5, q5, q13 - vadd.s16 q6, q6, q14 - vadd.s16 q7, q7, q15 - - vqmovun.s16 d0, q0 ;CLAMP() saturation - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q2 - vqmovun.s16 d3, q3 - vqmovun.s16 d4, q4 - vqmovun.s16 d5, q5 - vqmovun.s16 d6, q6 - vqmovun.s16 d7, q7 - add r0, r2, r3 - - vst1.u8 {q0}, [r2] ;store result - vst1.u8 {q1}, [r0], r3 - add r2, r0, r3 - vst1.u8 {q2}, [r0] - vst1.u8 {q3}, [r2], r3 - - bx lr - - ENDP - END diff --git a/vp9/common/arm/neon/vp9_recon_neon.c b/vp9/common/arm/neon/vp9_recon_neon.c deleted file mode 100644 index 1bf7a29bd..000000000 --- a/vp9/common/arm/neon/vp9_recon_neon.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "vp9/common/recon.h" -#include "vp9/common/vp9_blockd.h" - -extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr); - -void vp8_recon_mb_neon(MACROBLOCKD *xd) { - unsigned char *pred_ptr = &xd->predictor[0]; - short *diff_ptr = &xd->diff[0]; - unsigned char *dst_ptr = xd->dst.y_buffer; - unsigned char *udst_ptr = xd->dst.u_buffer; - unsigned char *vdst_ptr = xd->dst.v_buffer; - int ystride = xd->dst.y_stride; - /*int uv_stride = xd->dst.uv_stride;*/ - - vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, - udst_ptr, vdst_ptr); -} diff --git a/vp9/common/arm/neon/vp9_reconb_neon.asm b/vp9/common/arm/neon/vp9_reconb_neon.asm deleted file mode 100644 index 288c0ef01..000000000 --- a/vp9/common/arm/neon/vp9_reconb_neon.asm +++ /dev/null @@ -1,61 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_recon_b_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *pred_ptr, -; r1 short *diff_ptr, -; r2 unsigned char *dst_ptr, -; r3 int stride - -|vp8_recon_b_neon| PROC - mov r12, #16 - - vld1.u8 {d28}, [r0], r12 ;load 4 data/line from pred_ptr - vld1.16 {q10, q11}, [r1]! ;load data from diff_ptr - vld1.u8 {d29}, [r0], r12 - vld1.16 {q11, q12}, [r1]! - vld1.u8 {d30}, [r0], r12 - vld1.16 {q12, q13}, [r1]! - vld1.u8 {d31}, [r0], r12 - vld1.16 {q13}, [r1] - - vmovl.u8 q0, d28 ;modify Pred data from 8 bits to 16 bits - vmovl.u8 q1, d29 ;Pred data in d0, d2, d4, d6 - vmovl.u8 q2, d30 - vmovl.u8 q3, d31 - - vadd.s16 d0, d0, d20 ;add Diff data and Pred data together - vadd.s16 d2, d2, d22 - vadd.s16 d4, d4, d24 - vadd.s16 d6, d6, d26 - - vqmovun.s16 d0, q0 ;CLAMP() saturation - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q2 - vqmovun.s16 d3, q3 - add r1, r2, r3 - - vst1.32 {d0[0]}, [r2] ;store result - vst1.32 {d1[0]}, [r1], r3 - add r2, r1, r3 - vst1.32 {d2[0]}, [r1] - vst1.32 {d3[0]}, [r2], r3 - - bx lr - - ENDP - END diff --git a/vp9/common/arm/neon/vp9_save_neon_reg.asm b/vp9/common/arm/neon/vp9_save_neon_reg.asm deleted file mode 100644 index 71c3e7077..000000000 --- a/vp9/common/arm/neon/vp9_save_neon_reg.asm +++ /dev/null @@ -1,36 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_push_neon| - EXPORT |vp9_pop_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -|vp9_push_neon| PROC - vst1.i64 {d8, d9, d10, d11}, [r0]! - vst1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - ENDP - -|vp9_pop_neon| PROC - vld1.i64 {d8, d9, d10, d11}, [r0]! - vld1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - ENDP - - END - diff --git a/vp9/common/arm/neon/vp9_shortidct4x4llm_1_neon.asm b/vp9/common/arm/neon/vp9_shortidct4x4llm_1_neon.asm deleted file mode 100644 index d7bdbae75..000000000 --- a/vp9/common/arm/neon/vp9_shortidct4x4llm_1_neon.asm +++ /dev/null @@ -1,67 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_short_idct4x4llm_1_neon| - EXPORT |vp8_dc_only_idct_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch); -; r0 short *input; -; r1 short *output; -; r2 int pitch; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -|vp8_short_idct4x4llm_1_neon| PROC - vld1.16 {d0[]}, [r0] ;load input[0] - - add r3, r1, r2 - add r12, r3, r2 - - vrshr.s16 d0, d0, #3 - - add r0, r12, r2 - - vst1.16 {d0}, [r1] - vst1.16 {d0}, [r3] - vst1.16 {d0}, [r12] - vst1.16 {d0}, [r0] - - bx lr - ENDP - -;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;void vp8_dc_only_idct_c(short input_dc, short *output, int pitch); -; r0 short input_dc; -; r1 short *output; -; r2 int pitch; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -|vp8_dc_only_idct_neon| PROC - vdup.16 d0, r0 - - add r3, r1, r2 - add r12, r3, r2 - - vrshr.s16 d0, d0, #3 - - add r0, r12, r2 - - vst1.16 {d0}, [r1] - vst1.16 {d0}, [r3] - vst1.16 {d0}, [r12] - vst1.16 {d0}, [r0] - - bx lr - - ENDP - END diff --git a/vp9/common/arm/neon/vp9_shortidct4x4llm_neon.asm b/vp9/common/arm/neon/vp9_shortidct4x4llm_neon.asm deleted file mode 100644 index b74c31521..000000000 --- a/vp9/common/arm/neon/vp9_shortidct4x4llm_neon.asm +++ /dev/null @@ -1,122 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_short_idct4x4llm_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;************************************************************* -;void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) -;r0 short * input -;r1 short * output -;r2 int pitch -;************************************************************* -;static const int cospi8sqrt2minus1=20091; -;static const int sinpi8sqrt2 =35468; -;static const int rounding = 0; -;Optimization note: The resulted data from dequantization are signed 13-bit data that is -;in the range of [-4096, 4095]. This allows to use "vqdmulh"(neon) instruction since -;it won't go out of range (13+16+1=30bits<32bits). This instruction gives the high half -;result of the multiplication that is needed in IDCT. - -|vp8_short_idct4x4llm_neon| PROC - adr r12, idct_coeff - vld1.16 {q1, q2}, [r0] - vld1.16 {d0}, [r12] - - vswp d3, d4 ;q2(vp[4] vp[12]) - - vqdmulh.s16 q3, q2, d0[2] - vqdmulh.s16 q4, q2, d0[0] - - vqadd.s16 d12, d2, d3 ;a1 - vqsub.s16 d13, d2, d3 ;b1 - - vshr.s16 q3, q3, #1 - vshr.s16 q4, q4, #1 - - vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) - vqadd.s16 q4, q4, q2 - - ;d6 - c1:temp1 - ;d7 - d1:temp2 - ;d8 - d1:temp1 - ;d9 - c1:temp2 - - vqsub.s16 d10, d6, d9 ;c1 - vqadd.s16 d11, d7, d8 ;d1 - - vqadd.s16 d2, d12, d11 - vqadd.s16 d3, d13, d10 - vqsub.s16 d4, d13, d10 - vqsub.s16 d5, d12, d11 - - vtrn.32 d2, d4 - vtrn.32 d3, d5 - vtrn.16 d2, d3 - vtrn.16 d4, d5 - - vswp d3, d4 - - vqdmulh.s16 q3, q2, d0[2] - vqdmulh.s16 q4, q2, d0[0] - - vqadd.s16 d12, d2, d3 ;a1 - vqsub.s16 d13, d2, d3 ;b1 - - vshr.s16 q3, q3, #1 - vshr.s16 q4, q4, #1 - - vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) - vqadd.s16 q4, q4, q2 - - vqsub.s16 d10, d6, d9 ;c1 - vqadd.s16 d11, d7, d8 ;d1 - - vqadd.s16 d2, d12, d11 - vqadd.s16 d3, d13, d10 - vqsub.s16 d4, d13, d10 - vqsub.s16 d5, d12, d11 - - vrshr.s16 d2, d2, #3 - vrshr.s16 d3, d3, #3 - vrshr.s16 d4, d4, #3 - vrshr.s16 d5, d5, #3 - - add r3, r1, r2 - add r12, r3, r2 - add r0, r12, r2 - - vtrn.32 d2, d4 - vtrn.32 d3, d5 - vtrn.16 d2, d3 - vtrn.16 d4, d5 - - vst1.16 {d2}, [r1] - vst1.16 {d3}, [r3] - vst1.16 {d4}, [r12] - vst1.16 {d5}, [r0] - - bx lr - - ENDP - -;----------------- - -idct_coeff - DCD 0x4e7b4e7b, 0x8a8c8a8c - -;20091, 20091, 35468, 35468 - - END diff --git a/vp9/common/arm/neon/vp9_sixtappredict16x16_neon.asm b/vp9/common/arm/neon/vp9_sixtappredict16x16_neon.asm deleted file mode 100644 index 5e83f49f5..000000000 --- a/vp9/common/arm/neon/vp9_sixtappredict16x16_neon.asm +++ /dev/null @@ -1,490 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sixtap_predict16x16_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -filter16_coeff - DCD 0, 0, 128, 0, 0, 0, 0, 0 - DCD 0, -6, 123, 12, -1, 0, 0, 0 - DCD 2, -11, 108, 36, -8, 1, 0, 0 - DCD 0, -9, 93, 50, -6, 0, 0, 0 - DCD 3, -16, 77, 77, -16, 3, 0, 0 - DCD 0, -6, 50, 93, -9, 0, 0, 0 - DCD 1, -8, 36, 108, -11, 2, 0, 0 - DCD 0, -1, 12, 123, -6, 0, 0, 0 - -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; r4 unsigned char *dst_ptr, -; stack(r5) int dst_pitch - -;Note: To take advantage of 8-bit mulplication instruction in NEON. First apply abs() to -; filter coeffs to make them u8. Then, use vmlsl for negtive coeffs. After multiplication, -; the result can be negtive. So, I treat the result as s16. But, since it is also possible -; that the result can be a large positive number (> 2^15-1), which could be confused as a -; negtive number. To avoid that error, apply filter coeffs in the order of 0, 1, 4 ,5 ,2, -; which ensures that the result stays in s16 range. Finally, saturated add the result by -; applying 3rd filter coeff. Same applys to other filter functions. - -|vp8_sixtap_predict16x16_neon| PROC - push {r4-r5, lr} - - adr r12, filter16_coeff - ldr r4, [sp, #12] ;load parameters from stack - ldr r5, [sp, #16] ;load parameters from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq secondpass_filter16x16_only - - add r2, r12, r2, lsl #5 ;calculate filter location - - cmp r3, #0 ;skip second_pass filter if yoffset=0 - - vld1.s32 {q14, q15}, [r2] ;load first_pass filter - - beq firstpass_filter16x16_only - - sub sp, sp, #336 ;reserve space on stack for temporary storage - mov lr, sp - - vabs.s32 q12, q14 - vabs.s32 q13, q15 - - mov r2, #7 ;loop counter - sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2) - sub r0, r0, r1, lsl #1 - - vdup.8 d0, d24[0] ;first_pass filter (d0-d5) - vdup.8 d1, d24[4] - vdup.8 d2, d25[0] - vdup.8 d3, d25[4] - vdup.8 d4, d26[0] - vdup.8 d5, d26[4] - -;First Pass: output_height lines x output_width columns (21x16) -filt_blk2d_fp16x16_loop_neon - vld1.u8 {d6, d7, d8}, [r0], r1 ;load src data - vld1.u8 {d9, d10, d11}, [r0], r1 - vld1.u8 {d12, d13, d14}, [r0], r1 - - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q9, d7, d0 - vmull.u8 q10, d9, d0 - vmull.u8 q11, d10, d0 - vmull.u8 q12, d12, d0 - vmull.u8 q13, d13, d0 - - vext.8 d28, d6, d7, #1 ;construct src_ptr[-1] - vext.8 d29, d9, d10, #1 - vext.8 d30, d12, d13, #1 - - vmlsl.u8 q8, d28, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q10, d29, d1 - vmlsl.u8 q12, d30, d1 - - vext.8 d28, d7, d8, #1 - vext.8 d29, d10, d11, #1 - vext.8 d30, d13, d14, #1 - - vmlsl.u8 q9, d28, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q11, d29, d1 - vmlsl.u8 q13, d30, d1 - - vext.8 d28, d6, d7, #4 ;construct src_ptr[2] - vext.8 d29, d9, d10, #4 - vext.8 d30, d12, d13, #4 - - vmlsl.u8 q8, d28, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q10, d29, d4 - vmlsl.u8 q12, d30, d4 - - vext.8 d28, d7, d8, #4 - vext.8 d29, d10, d11, #4 - vext.8 d30, d13, d14, #4 - - vmlsl.u8 q9, d28, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q11, d29, d4 - vmlsl.u8 q13, d30, d4 - - vext.8 d28, d6, d7, #5 ;construct src_ptr[3] - vext.8 d29, d9, d10, #5 - vext.8 d30, d12, d13, #5 - - vmlal.u8 q8, d28, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q10, d29, d5 - vmlal.u8 q12, d30, d5 - - vext.8 d28, d7, d8, #5 - vext.8 d29, d10, d11, #5 - vext.8 d30, d13, d14, #5 - - vmlal.u8 q9, d28, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q11, d29, d5 - vmlal.u8 q13, d30, d5 - - vext.8 d28, d6, d7, #2 ;construct src_ptr[0] - vext.8 d29, d9, d10, #2 - vext.8 d30, d12, d13, #2 - - vmlal.u8 q8, d28, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q10, d29, d2 - vmlal.u8 q12, d30, d2 - - vext.8 d28, d7, d8, #2 - vext.8 d29, d10, d11, #2 - vext.8 d30, d13, d14, #2 - - vmlal.u8 q9, d28, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q11, d29, d2 - vmlal.u8 q13, d30, d2 - - vext.8 d28, d6, d7, #3 ;construct src_ptr[1] - vext.8 d29, d9, d10, #3 - vext.8 d30, d12, d13, #3 - - vext.8 d15, d7, d8, #3 - vext.8 d31, d10, d11, #3 - vext.8 d6, d13, d14, #3 - - vmull.u8 q4, d28, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q5, d29, d3 - vmull.u8 q6, d30, d3 - - vqadd.s16 q8, q4 ;sum of all (src_data*filter_parameters) - vqadd.s16 q10, q5 - vqadd.s16 q12, q6 - - vmull.u8 q6, d15, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q7, d31, d3 - vmull.u8 q3, d6, d3 - - subs r2, r2, #1 - - vqadd.s16 q9, q6 - vqadd.s16 q11, q7 - vqadd.s16 q13, q3 - - vqrshrun.s16 d6, q8, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d7, q9, #7 - vqrshrun.s16 d8, q10, #7 - vqrshrun.s16 d9, q11, #7 - vqrshrun.s16 d10, q12, #7 - vqrshrun.s16 d11, q13, #7 - - vst1.u8 {d6, d7, d8}, [lr]! ;store result - vst1.u8 {d9, d10, d11}, [lr]! - - bne filt_blk2d_fp16x16_loop_neon - -;Second pass: 16x16 -;secondpass_filter - do first 8-columns and then second 8-columns - add r3, r12, r3, lsl #5 - sub lr, lr, #336 - - vld1.s32 {q5, q6}, [r3] ;load second_pass filter - mov r3, #2 ;loop counter - - vabs.s32 q7, q5 - vabs.s32 q8, q6 - - mov r2, #16 - - vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) - vdup.8 d1, d14[4] - vdup.8 d2, d15[0] - vdup.8 d3, d15[4] - vdup.8 d4, d16[0] - vdup.8 d5, d16[4] - -filt_blk2d_sp16x16_outloop_neon - vld1.u8 {d18}, [lr], r2 ;load src data - vld1.u8 {d19}, [lr], r2 - vld1.u8 {d20}, [lr], r2 - vld1.u8 {d21}, [lr], r2 - mov r12, #4 ;loop counter - vld1.u8 {d22}, [lr], r2 - -secondpass_inner_loop_neon - vld1.u8 {d23}, [lr], r2 ;load src data - vld1.u8 {d24}, [lr], r2 - vld1.u8 {d25}, [lr], r2 - vld1.u8 {d26}, [lr], r2 - - vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q4, d19, d0 - vmull.u8 q5, d20, d0 - vmull.u8 q6, d21, d0 - - vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q4, d20, d1 - vmlsl.u8 q5, d21, d1 - vmlsl.u8 q6, d22, d1 - - vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q4, d23, d4 - vmlsl.u8 q5, d24, d4 - vmlsl.u8 q6, d25, d4 - - vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q4, d21, d2 - vmlal.u8 q5, d22, d2 - vmlal.u8 q6, d23, d2 - - vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q4, d24, d5 - vmlal.u8 q5, d25, d5 - vmlal.u8 q6, d26, d5 - - vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q8, d22, d3 - vmull.u8 q9, d23, d3 - vmull.u8 q10, d24, d3 - - subs r12, r12, #1 - - vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q4 - vqadd.s16 q9, q5 - vqadd.s16 q10, q6 - - vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d7, q8, #7 - vqrshrun.s16 d8, q9, #7 - vqrshrun.s16 d9, q10, #7 - - vst1.u8 {d6}, [r4], r5 ;store result - vmov q9, q11 - vst1.u8 {d7}, [r4], r5 - vmov q10, q12 - vst1.u8 {d8}, [r4], r5 - vmov d22, d26 - vst1.u8 {d9}, [r4], r5 - - bne secondpass_inner_loop_neon - - subs r3, r3, #1 - sub lr, lr, #336 - add lr, lr, #8 - - sub r4, r4, r5, lsl #4 - add r4, r4, #8 - - bne filt_blk2d_sp16x16_outloop_neon - - add sp, sp, #336 - pop {r4-r5,pc} - -;-------------------- -firstpass_filter16x16_only - vabs.s32 q12, q14 - vabs.s32 q13, q15 - - mov r2, #8 ;loop counter - sub r0, r0, #2 ;move srcptr back to (column-2) - - vdup.8 d0, d24[0] ;first_pass filter (d0-d5) - vdup.8 d1, d24[4] - vdup.8 d2, d25[0] - vdup.8 d3, d25[4] - vdup.8 d4, d26[0] - vdup.8 d5, d26[4] - -;First Pass: output_height lines x output_width columns (16x16) -filt_blk2d_fpo16x16_loop_neon - vld1.u8 {d6, d7, d8}, [r0], r1 ;load src data - vld1.u8 {d9, d10, d11}, [r0], r1 - - pld [r0] - pld [r0, r1] - - vmull.u8 q6, d6, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q7, d7, d0 - vmull.u8 q8, d9, d0 - vmull.u8 q9, d10, d0 - - vext.8 d20, d6, d7, #1 ;construct src_ptr[-1] - vext.8 d21, d9, d10, #1 - vext.8 d22, d7, d8, #1 - vext.8 d23, d10, d11, #1 - vext.8 d24, d6, d7, #4 ;construct src_ptr[2] - vext.8 d25, d9, d10, #4 - vext.8 d26, d7, d8, #4 - vext.8 d27, d10, d11, #4 - vext.8 d28, d6, d7, #5 ;construct src_ptr[3] - vext.8 d29, d9, d10, #5 - - vmlsl.u8 q6, d20, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q8, d21, d1 - vmlsl.u8 q7, d22, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q9, d23, d1 - vmlsl.u8 q6, d24, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q8, d25, d4 - vmlsl.u8 q7, d26, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q9, d27, d4 - vmlal.u8 q6, d28, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q8, d29, d5 - - vext.8 d20, d7, d8, #5 - vext.8 d21, d10, d11, #5 - vext.8 d22, d6, d7, #2 ;construct src_ptr[0] - vext.8 d23, d9, d10, #2 - vext.8 d24, d7, d8, #2 - vext.8 d25, d10, d11, #2 - - vext.8 d26, d6, d7, #3 ;construct src_ptr[1] - vext.8 d27, d9, d10, #3 - vext.8 d28, d7, d8, #3 - vext.8 d29, d10, d11, #3 - - vmlal.u8 q7, d20, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q9, d21, d5 - vmlal.u8 q6, d22, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q8, d23, d2 - vmlal.u8 q7, d24, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q9, d25, d2 - - vmull.u8 q10, d26, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q11, d27, d3 - vmull.u8 q12, d28, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q15, d29, d3 - - vqadd.s16 q6, q10 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q11 - vqadd.s16 q7, q12 - vqadd.s16 q9, q15 - - subs r2, r2, #1 - - vqrshrun.s16 d6, q6, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d7, q7, #7 - vqrshrun.s16 d8, q8, #7 - vqrshrun.s16 d9, q9, #7 - - vst1.u8 {q3}, [r4], r5 ;store result - vst1.u8 {q4}, [r4], r5 - - bne filt_blk2d_fpo16x16_loop_neon - - pop {r4-r5,pc} - -;-------------------- -secondpass_filter16x16_only -;Second pass: 16x16 - add r3, r12, r3, lsl #5 - sub r0, r0, r1, lsl #1 - - vld1.s32 {q5, q6}, [r3] ;load second_pass filter - mov r3, #2 ;loop counter - - vabs.s32 q7, q5 - vabs.s32 q8, q6 - - vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) - vdup.8 d1, d14[4] - vdup.8 d2, d15[0] - vdup.8 d3, d15[4] - vdup.8 d4, d16[0] - vdup.8 d5, d16[4] - -filt_blk2d_spo16x16_outloop_neon - vld1.u8 {d18}, [r0], r1 ;load src data - vld1.u8 {d19}, [r0], r1 - vld1.u8 {d20}, [r0], r1 - vld1.u8 {d21}, [r0], r1 - mov r12, #4 ;loop counter - vld1.u8 {d22}, [r0], r1 - -secondpass_only_inner_loop_neon - vld1.u8 {d23}, [r0], r1 ;load src data - vld1.u8 {d24}, [r0], r1 - vld1.u8 {d25}, [r0], r1 - vld1.u8 {d26}, [r0], r1 - - vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q4, d19, d0 - vmull.u8 q5, d20, d0 - vmull.u8 q6, d21, d0 - - vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q4, d20, d1 - vmlsl.u8 q5, d21, d1 - vmlsl.u8 q6, d22, d1 - - vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q4, d23, d4 - vmlsl.u8 q5, d24, d4 - vmlsl.u8 q6, d25, d4 - - vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q4, d21, d2 - vmlal.u8 q5, d22, d2 - vmlal.u8 q6, d23, d2 - - vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q4, d24, d5 - vmlal.u8 q5, d25, d5 - vmlal.u8 q6, d26, d5 - - vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q8, d22, d3 - vmull.u8 q9, d23, d3 - vmull.u8 q10, d24, d3 - - subs r12, r12, #1 - - vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q4 - vqadd.s16 q9, q5 - vqadd.s16 q10, q6 - - vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d7, q8, #7 - vqrshrun.s16 d8, q9, #7 - vqrshrun.s16 d9, q10, #7 - - vst1.u8 {d6}, [r4], r5 ;store result - vmov q9, q11 - vst1.u8 {d7}, [r4], r5 - vmov q10, q12 - vst1.u8 {d8}, [r4], r5 - vmov d22, d26 - vst1.u8 {d9}, [r4], r5 - - bne secondpass_only_inner_loop_neon - - subs r3, r3, #1 - sub r0, r0, r1, lsl #4 - sub r0, r0, r1, lsl #2 - sub r0, r0, r1 - add r0, r0, #8 - - sub r4, r4, r5, lsl #4 - add r4, r4, #8 - - bne filt_blk2d_spo16x16_outloop_neon - - pop {r4-r5,pc} - - ENDP - -;----------------- - END diff --git a/vp9/common/arm/neon/vp9_sixtappredict4x4_neon.asm b/vp9/common/arm/neon/vp9_sixtappredict4x4_neon.asm deleted file mode 100644 index 5966b642f..000000000 --- a/vp9/common/arm/neon/vp9_sixtappredict4x4_neon.asm +++ /dev/null @@ -1,422 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sixtap_predict_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -filter4_coeff - DCD 0, 0, 128, 0, 0, 0, 0, 0 - DCD 0, -6, 123, 12, -1, 0, 0, 0 - DCD 2, -11, 108, 36, -8, 1, 0, 0 - DCD 0, -9, 93, 50, -6, 0, 0, 0 - DCD 3, -16, 77, 77, -16, 3, 0, 0 - DCD 0, -6, 50, 93, -9, 0, 0, 0 - DCD 1, -8, 36, 108, -11, 2, 0, 0 - DCD 0, -1, 12, 123, -6, 0, 0, 0 - -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; stack(r4) unsigned char *dst_ptr, -; stack(lr) int dst_pitch - -|vp8_sixtap_predict_neon| PROC - push {r4, lr} - - adr r12, filter4_coeff - ldr r4, [sp, #8] ;load parameters from stack - ldr lr, [sp, #12] ;load parameters from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq secondpass_filter4x4_only - - add r2, r12, r2, lsl #5 ;calculate filter location - - cmp r3, #0 ;skip second_pass filter if yoffset=0 - vld1.s32 {q14, q15}, [r2] ;load first_pass filter - - beq firstpass_filter4x4_only - - vabs.s32 q12, q14 ;get abs(filer_parameters) - vabs.s32 q13, q15 - - sub r0, r0, #2 ;go back 2 columns of src data - sub r0, r0, r1, lsl #1 ;go back 2 lines of src data - -;First pass: output_height lines x output_width columns (9x4) - vld1.u8 {q3}, [r0], r1 ;load first 4-line src data - vdup.8 d0, d24[0] ;first_pass filter (d0-d5) - vld1.u8 {q4}, [r0], r1 - vdup.8 d1, d24[4] - vld1.u8 {q5}, [r0], r1 - vdup.8 d2, d25[0] - vld1.u8 {q6}, [r0], r1 - vdup.8 d3, d25[4] - vdup.8 d4, d26[0] - vdup.8 d5, d26[4] - - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vext.8 d18, d6, d7, #5 ;construct src_ptr[3] - vext.8 d19, d8, d9, #5 - vext.8 d20, d10, d11, #5 - vext.8 d21, d12, d13, #5 - - vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done - vswp d11, d12 - - vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3]) - vzip.32 d20, d21 - vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp9_filter[5]) - vmull.u8 q8, d20, d5 - - vmov q4, q3 ;keep original src data in q4 q6 - vmov q6, q5 - - vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together - vzip.32 d10, d11 - vshr.u64 q9, q4, #8 ;construct src_ptr[-1] - vshr.u64 q10, q6, #8 - vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp9_filter[0]) - vmlal.u8 q8, d10, d0 - - vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1]) - vzip.32 d20, d21 - vshr.u64 q3, q4, #32 ;construct src_ptr[2] - vshr.u64 q5, q6, #32 - vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q8, d20, d1 - - vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2]) - vzip.32 d10, d11 - vshr.u64 q9, q4, #16 ;construct src_ptr[0] - vshr.u64 q10, q6, #16 - vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q8, d10, d4 - - vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0]) - vzip.32 d20, d21 - vshr.u64 q3, q4, #24 ;construct src_ptr[1] - vshr.u64 q5, q6, #24 - vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q8, d20, d2 - - vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1]) - vzip.32 d10, d11 - vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q10, d10, d3 - - vld1.u8 {q3}, [r0], r1 ;load rest 5-line src data - vld1.u8 {q4}, [r0], r1 - - vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q10 - - vld1.u8 {q5}, [r0], r1 - vld1.u8 {q6}, [r0], r1 - - vqrshrun.s16 d27, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d28, q8, #7 - - ;First Pass on rest 5-line data - vld1.u8 {q11}, [r0], r1 - - vext.8 d18, d6, d7, #5 ;construct src_ptr[3] - vext.8 d19, d8, d9, #5 - vext.8 d20, d10, d11, #5 - vext.8 d21, d12, d13, #5 - - vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done - vswp d11, d12 - - vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3]) - vzip.32 d20, d21 - vext.8 d31, d22, d23, #5 ;construct src_ptr[3] - vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp9_filter[5]) - vmull.u8 q8, d20, d5 - vmull.u8 q12, d31, d5 ;(src_ptr[3] * vp9_filter[5]) - - vmov q4, q3 ;keep original src data in q4 q6 - vmov q6, q5 - - vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together - vzip.32 d10, d11 - vshr.u64 q9, q4, #8 ;construct src_ptr[-1] - vshr.u64 q10, q6, #8 - - vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp9_filter[0]) - vmlal.u8 q8, d10, d0 - vmlal.u8 q12, d22, d0 ;(src_ptr[-2] * vp9_filter[0]) - - vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1]) - vzip.32 d20, d21 - vshr.u64 q3, q4, #32 ;construct src_ptr[2] - vshr.u64 q5, q6, #32 - vext.8 d31, d22, d23, #1 ;construct src_ptr[-1] - - vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q8, d20, d1 - vmlsl.u8 q12, d31, d1 ;-(src_ptr[-1] * vp9_filter[1]) - - vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2]) - vzip.32 d10, d11 - vshr.u64 q9, q4, #16 ;construct src_ptr[0] - vshr.u64 q10, q6, #16 - vext.8 d31, d22, d23, #4 ;construct src_ptr[2] - - vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q8, d10, d4 - vmlsl.u8 q12, d31, d4 ;-(src_ptr[2] * vp9_filter[4]) - - vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0]) - vzip.32 d20, d21 - vshr.u64 q3, q4, #24 ;construct src_ptr[1] - vshr.u64 q5, q6, #24 - vext.8 d31, d22, d23, #2 ;construct src_ptr[0] - - vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q8, d20, d2 - vmlal.u8 q12, d31, d2 ;(src_ptr[0] * vp9_filter[2]) - - vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1]) - vzip.32 d10, d11 - vext.8 d31, d22, d23, #3 ;construct src_ptr[1] - vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q10, d10, d3 - vmull.u8 q11, d31, d3 ;(src_ptr[1] * vp9_filter[3]) - - add r3, r12, r3, lsl #5 - - vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q10 - vqadd.s16 q12, q11 - - vext.8 d23, d27, d28, #4 - vld1.s32 {q5, q6}, [r3] ;load second_pass filter - - vqrshrun.s16 d29, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d30, q8, #7 - vqrshrun.s16 d31, q12, #7 - -;Second pass: 4x4 - vabs.s32 q7, q5 - vabs.s32 q8, q6 - - vext.8 d24, d28, d29, #4 - vext.8 d25, d29, d30, #4 - vext.8 d26, d30, d31, #4 - - vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) - vdup.8 d1, d14[4] - vdup.8 d2, d15[0] - vdup.8 d3, d15[4] - vdup.8 d4, d16[0] - vdup.8 d5, d16[4] - - vmull.u8 q3, d27, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q4, d28, d0 - - vmull.u8 q5, d25, d5 ;(src_ptr[3] * vp9_filter[5]) - vmull.u8 q6, d26, d5 - - vmlsl.u8 q3, d29, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q4, d30, d4 - - vmlsl.u8 q5, d23, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q6, d24, d1 - - vmlal.u8 q3, d28, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q4, d29, d2 - - vmlal.u8 q5, d24, d3 ;(src_ptr[1] * vp9_filter[3]) - vmlal.u8 q6, d25, d3 - - add r0, r4, lr - add r1, r0, lr - add r2, r1, lr - - vqadd.s16 q5, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q6, q4 - - vqrshrun.s16 d3, q5, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d4, q6, #7 - - vst1.32 {d3[0]}, [r4] ;store result - vst1.32 {d3[1]}, [r0] - vst1.32 {d4[0]}, [r1] - vst1.32 {d4[1]}, [r2] - - pop {r4, pc} - - -;--------------------- -firstpass_filter4x4_only - vabs.s32 q12, q14 ;get abs(filer_parameters) - vabs.s32 q13, q15 - - sub r0, r0, #2 ;go back 2 columns of src data - -;First pass: output_height lines x output_width columns (4x4) - vld1.u8 {q3}, [r0], r1 ;load first 4-line src data - vdup.8 d0, d24[0] ;first_pass filter (d0-d5) - vld1.u8 {q4}, [r0], r1 - vdup.8 d1, d24[4] - vld1.u8 {q5}, [r0], r1 - vdup.8 d2, d25[0] - vld1.u8 {q6}, [r0], r1 - - vdup.8 d3, d25[4] - vdup.8 d4, d26[0] - vdup.8 d5, d26[4] - - vext.8 d18, d6, d7, #5 ;construct src_ptr[3] - vext.8 d19, d8, d9, #5 - vext.8 d20, d10, d11, #5 - vext.8 d21, d12, d13, #5 - - vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done - vswp d11, d12 - - vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3]) - vzip.32 d20, d21 - vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp9_filter[5]) - vmull.u8 q8, d20, d5 - - vmov q4, q3 ;keep original src data in q4 q6 - vmov q6, q5 - - vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together - vzip.32 d10, d11 - vshr.u64 q9, q4, #8 ;construct src_ptr[-1] - vshr.u64 q10, q6, #8 - vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp9_filter[0]) - vmlal.u8 q8, d10, d0 - - vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1]) - vzip.32 d20, d21 - vshr.u64 q3, q4, #32 ;construct src_ptr[2] - vshr.u64 q5, q6, #32 - vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q8, d20, d1 - - vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2]) - vzip.32 d10, d11 - vshr.u64 q9, q4, #16 ;construct src_ptr[0] - vshr.u64 q10, q6, #16 - vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q8, d10, d4 - - vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0]) - vzip.32 d20, d21 - vshr.u64 q3, q4, #24 ;construct src_ptr[1] - vshr.u64 q5, q6, #24 - vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q8, d20, d2 - - vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1]) - vzip.32 d10, d11 - vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q10, d10, d3 - - add r0, r4, lr - add r1, r0, lr - add r2, r1, lr - - vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q10 - - vqrshrun.s16 d27, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d28, q8, #7 - - vst1.32 {d27[0]}, [r4] ;store result - vst1.32 {d27[1]}, [r0] - vst1.32 {d28[0]}, [r1] - vst1.32 {d28[1]}, [r2] - - pop {r4, pc} - - -;--------------------- -secondpass_filter4x4_only - sub r0, r0, r1, lsl #1 - add r3, r12, r3, lsl #5 - - vld1.32 {d27[0]}, [r0], r1 ;load src data - vld1.s32 {q5, q6}, [r3] ;load second_pass filter - vld1.32 {d27[1]}, [r0], r1 - vabs.s32 q7, q5 - vld1.32 {d28[0]}, [r0], r1 - vabs.s32 q8, q6 - vld1.32 {d28[1]}, [r0], r1 - vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) - vld1.32 {d29[0]}, [r0], r1 - vdup.8 d1, d14[4] - vld1.32 {d29[1]}, [r0], r1 - vdup.8 d2, d15[0] - vld1.32 {d30[0]}, [r0], r1 - vdup.8 d3, d15[4] - vld1.32 {d30[1]}, [r0], r1 - vdup.8 d4, d16[0] - vld1.32 {d31[0]}, [r0], r1 - vdup.8 d5, d16[4] - - vext.8 d23, d27, d28, #4 - vext.8 d24, d28, d29, #4 - vext.8 d25, d29, d30, #4 - vext.8 d26, d30, d31, #4 - - vmull.u8 q3, d27, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q4, d28, d0 - - vmull.u8 q5, d25, d5 ;(src_ptr[3] * vp9_filter[5]) - vmull.u8 q6, d26, d5 - - vmlsl.u8 q3, d29, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q4, d30, d4 - - vmlsl.u8 q5, d23, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q6, d24, d1 - - vmlal.u8 q3, d28, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q4, d29, d2 - - vmlal.u8 q5, d24, d3 ;(src_ptr[1] * vp9_filter[3]) - vmlal.u8 q6, d25, d3 - - add r0, r4, lr - add r1, r0, lr - add r2, r1, lr - - vqadd.s16 q5, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q6, q4 - - vqrshrun.s16 d3, q5, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d4, q6, #7 - - vst1.32 {d3[0]}, [r4] ;store result - vst1.32 {d3[1]}, [r0] - vst1.32 {d4[0]}, [r1] - vst1.32 {d4[1]}, [r2] - - pop {r4, pc} - - ENDP - -;----------------- - - END diff --git a/vp9/common/arm/neon/vp9_sixtappredict8x4_neon.asm b/vp9/common/arm/neon/vp9_sixtappredict8x4_neon.asm deleted file mode 100644 index 9ce1e3bbd..000000000 --- a/vp9/common/arm/neon/vp9_sixtappredict8x4_neon.asm +++ /dev/null @@ -1,473 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sixtap_predict8x4_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -filter8_coeff - DCD 0, 0, 128, 0, 0, 0, 0, 0 - DCD 0, -6, 123, 12, -1, 0, 0, 0 - DCD 2, -11, 108, 36, -8, 1, 0, 0 - DCD 0, -9, 93, 50, -6, 0, 0, 0 - DCD 3, -16, 77, 77, -16, 3, 0, 0 - DCD 0, -6, 50, 93, -9, 0, 0, 0 - DCD 1, -8, 36, 108, -11, 2, 0, 0 - DCD 0, -1, 12, 123, -6, 0, 0, 0 - -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; r4 unsigned char *dst_ptr, -; stack(r5) int dst_pitch - -|vp8_sixtap_predict8x4_neon| PROC - push {r4-r5, lr} - - adr r12, filter8_coeff - ldr r4, [sp, #12] ;load parameters from stack - ldr r5, [sp, #16] ;load parameters from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq secondpass_filter8x4_only - - add r2, r12, r2, lsl #5 ;calculate filter location - - cmp r3, #0 ;skip second_pass filter if yoffset=0 - - vld1.s32 {q14, q15}, [r2] ;load first_pass filter - - beq firstpass_filter8x4_only - - sub sp, sp, #32 ;reserve space on stack for temporary storage - vabs.s32 q12, q14 - vabs.s32 q13, q15 - - sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2) - mov lr, sp - sub r0, r0, r1, lsl #1 - - vdup.8 d0, d24[0] ;first_pass filter (d0-d5) - vdup.8 d1, d24[4] - vdup.8 d2, d25[0] - -;First pass: output_height lines x output_width columns (9x8) - vld1.u8 {q3}, [r0], r1 ;load src data - vdup.8 d3, d25[4] - vld1.u8 {q4}, [r0], r1 - vdup.8 d4, d26[0] - vld1.u8 {q5}, [r0], r1 - vdup.8 d5, d26[4] - vld1.u8 {q6}, [r0], r1 - - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q8, d8, d0 - vmull.u8 q9, d10, d0 - vmull.u8 q10, d12, d0 - - vext.8 d28, d6, d7, #1 ;construct src_ptr[-1] - vext.8 d29, d8, d9, #1 - vext.8 d30, d10, d11, #1 - vext.8 d31, d12, d13, #1 - - vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q8, d29, d1 - vmlsl.u8 q9, d30, d1 - vmlsl.u8 q10, d31, d1 - - vext.8 d28, d6, d7, #4 ;construct src_ptr[2] - vext.8 d29, d8, d9, #4 - vext.8 d30, d10, d11, #4 - vext.8 d31, d12, d13, #4 - - vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q8, d29, d4 - vmlsl.u8 q9, d30, d4 - vmlsl.u8 q10, d31, d4 - - vext.8 d28, d6, d7, #2 ;construct src_ptr[0] - vext.8 d29, d8, d9, #2 - vext.8 d30, d10, d11, #2 - vext.8 d31, d12, d13, #2 - - vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q8, d29, d2 - vmlal.u8 q9, d30, d2 - vmlal.u8 q10, d31, d2 - - vext.8 d28, d6, d7, #5 ;construct src_ptr[3] - vext.8 d29, d8, d9, #5 - vext.8 d30, d10, d11, #5 - vext.8 d31, d12, d13, #5 - - vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q8, d29, d5 - vmlal.u8 q9, d30, d5 - vmlal.u8 q10, d31, d5 - - vext.8 d28, d6, d7, #3 ;construct src_ptr[1] - vext.8 d29, d8, d9, #3 - vext.8 d30, d10, d11, #3 - vext.8 d31, d12, d13, #3 - - vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q4, d29, d3 - vmull.u8 q5, d30, d3 - vmull.u8 q6, d31, d3 - - vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q4 - vqadd.s16 q9, q5 - vqadd.s16 q10, q6 - - vld1.u8 {q3}, [r0], r1 ;load src data - - vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d23, q8, #7 - vqrshrun.s16 d24, q9, #7 - vqrshrun.s16 d25, q10, #7 - - vld1.u8 {q4}, [r0], r1 - vst1.u8 {d22}, [lr]! ;store result - vld1.u8 {q5}, [r0], r1 - vst1.u8 {d23}, [lr]! - vld1.u8 {q6}, [r0], r1 - vst1.u8 {d24}, [lr]! - vld1.u8 {q7}, [r0], r1 - vst1.u8 {d25}, [lr]! - - ;first_pass filtering on the rest 5-line data - vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q9, d8, d0 - vmull.u8 q10, d10, d0 - vmull.u8 q11, d12, d0 - vmull.u8 q12, d14, d0 - - vext.8 d27, d6, d7, #1 ;construct src_ptr[-1] - vext.8 d28, d8, d9, #1 - vext.8 d29, d10, d11, #1 - vext.8 d30, d12, d13, #1 - vext.8 d31, d14, d15, #1 - - vmlsl.u8 q8, d27, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q9, d28, d1 - vmlsl.u8 q10, d29, d1 - vmlsl.u8 q11, d30, d1 - vmlsl.u8 q12, d31, d1 - - vext.8 d27, d6, d7, #4 ;construct src_ptr[2] - vext.8 d28, d8, d9, #4 - vext.8 d29, d10, d11, #4 - vext.8 d30, d12, d13, #4 - vext.8 d31, d14, d15, #4 - - vmlsl.u8 q8, d27, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q9, d28, d4 - vmlsl.u8 q10, d29, d4 - vmlsl.u8 q11, d30, d4 - vmlsl.u8 q12, d31, d4 - - vext.8 d27, d6, d7, #2 ;construct src_ptr[0] - vext.8 d28, d8, d9, #2 - vext.8 d29, d10, d11, #2 - vext.8 d30, d12, d13, #2 - vext.8 d31, d14, d15, #2 - - vmlal.u8 q8, d27, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q9, d28, d2 - vmlal.u8 q10, d29, d2 - vmlal.u8 q11, d30, d2 - vmlal.u8 q12, d31, d2 - - vext.8 d27, d6, d7, #5 ;construct src_ptr[3] - vext.8 d28, d8, d9, #5 - vext.8 d29, d10, d11, #5 - vext.8 d30, d12, d13, #5 - vext.8 d31, d14, d15, #5 - - vmlal.u8 q8, d27, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q9, d28, d5 - vmlal.u8 q10, d29, d5 - vmlal.u8 q11, d30, d5 - vmlal.u8 q12, d31, d5 - - vext.8 d27, d6, d7, #3 ;construct src_ptr[1] - vext.8 d28, d8, d9, #3 - vext.8 d29, d10, d11, #3 - vext.8 d30, d12, d13, #3 - vext.8 d31, d14, d15, #3 - - vmull.u8 q3, d27, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q4, d28, d3 - vmull.u8 q5, d29, d3 - vmull.u8 q6, d30, d3 - vmull.u8 q7, d31, d3 - - vqadd.s16 q8, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q9, q4 - vqadd.s16 q10, q5 - vqadd.s16 q11, q6 - vqadd.s16 q12, q7 - - vqrshrun.s16 d26, q8, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d27, q9, #7 - vqrshrun.s16 d28, q10, #7 - vqrshrun.s16 d29, q11, #7 ;load intermediate data from stack - vqrshrun.s16 d30, q12, #7 - -;Second pass: 8x4 -;secondpass_filter - add r3, r12, r3, lsl #5 - sub lr, lr, #32 - - vld1.s32 {q5, q6}, [r3] ;load second_pass filter - vld1.u8 {q11}, [lr]! - - vabs.s32 q7, q5 - vabs.s32 q8, q6 - - vld1.u8 {q12}, [lr]! - - vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) - vdup.8 d1, d14[4] - vdup.8 d2, d15[0] - vdup.8 d3, d15[4] - vdup.8 d4, d16[0] - vdup.8 d5, d16[4] - - vmull.u8 q3, d22, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q4, d23, d0 - vmull.u8 q5, d24, d0 - vmull.u8 q6, d25, d0 - - vmlsl.u8 q3, d23, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q4, d24, d1 - vmlsl.u8 q5, d25, d1 - vmlsl.u8 q6, d26, d1 - - vmlsl.u8 q3, d26, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q4, d27, d4 - vmlsl.u8 q5, d28, d4 - vmlsl.u8 q6, d29, d4 - - vmlal.u8 q3, d24, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q4, d25, d2 - vmlal.u8 q5, d26, d2 - vmlal.u8 q6, d27, d2 - - vmlal.u8 q3, d27, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q4, d28, d5 - vmlal.u8 q5, d29, d5 - vmlal.u8 q6, d30, d5 - - vmull.u8 q7, d25, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q8, d26, d3 - vmull.u8 q9, d27, d3 - vmull.u8 q10, d28, d3 - - vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q4 - vqadd.s16 q9, q5 - vqadd.s16 q10, q6 - - vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d7, q8, #7 - vqrshrun.s16 d8, q9, #7 - vqrshrun.s16 d9, q10, #7 - - vst1.u8 {d6}, [r4], r5 ;store result - vst1.u8 {d7}, [r4], r5 - vst1.u8 {d8}, [r4], r5 - vst1.u8 {d9}, [r4], r5 - - add sp, sp, #32 - pop {r4-r5,pc} - -;-------------------- -firstpass_filter8x4_only - vabs.s32 q12, q14 - vabs.s32 q13, q15 - - sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2) - vld1.u8 {q3}, [r0], r1 ;load src data - - vdup.8 d0, d24[0] ;first_pass filter (d0-d5) - vld1.u8 {q4}, [r0], r1 - vdup.8 d1, d24[4] - vld1.u8 {q5}, [r0], r1 - vdup.8 d2, d25[0] - vld1.u8 {q6}, [r0], r1 - vdup.8 d3, d25[4] - vdup.8 d4, d26[0] - vdup.8 d5, d26[4] - -;First pass: output_height lines x output_width columns (4x8) - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q8, d8, d0 - vmull.u8 q9, d10, d0 - vmull.u8 q10, d12, d0 - - vext.8 d28, d6, d7, #1 ;construct src_ptr[-1] - vext.8 d29, d8, d9, #1 - vext.8 d30, d10, d11, #1 - vext.8 d31, d12, d13, #1 - - vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q8, d29, d1 - vmlsl.u8 q9, d30, d1 - vmlsl.u8 q10, d31, d1 - - vext.8 d28, d6, d7, #4 ;construct src_ptr[2] - vext.8 d29, d8, d9, #4 - vext.8 d30, d10, d11, #4 - vext.8 d31, d12, d13, #4 - - vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q8, d29, d4 - vmlsl.u8 q9, d30, d4 - vmlsl.u8 q10, d31, d4 - - vext.8 d28, d6, d7, #2 ;construct src_ptr[0] - vext.8 d29, d8, d9, #2 - vext.8 d30, d10, d11, #2 - vext.8 d31, d12, d13, #2 - - vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q8, d29, d2 - vmlal.u8 q9, d30, d2 - vmlal.u8 q10, d31, d2 - - vext.8 d28, d6, d7, #5 ;construct src_ptr[3] - vext.8 d29, d8, d9, #5 - vext.8 d30, d10, d11, #5 - vext.8 d31, d12, d13, #5 - - vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q8, d29, d5 - vmlal.u8 q9, d30, d5 - vmlal.u8 q10, d31, d5 - - vext.8 d28, d6, d7, #3 ;construct src_ptr[1] - vext.8 d29, d8, d9, #3 - vext.8 d30, d10, d11, #3 - vext.8 d31, d12, d13, #3 - - vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q4, d29, d3 - vmull.u8 q5, d30, d3 - vmull.u8 q6, d31, d3 - - vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q4 - vqadd.s16 q9, q5 - vqadd.s16 q10, q6 - - vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d23, q8, #7 - vqrshrun.s16 d24, q9, #7 - vqrshrun.s16 d25, q10, #7 - - vst1.u8 {d22}, [r4], r5 ;store result - vst1.u8 {d23}, [r4], r5 - vst1.u8 {d24}, [r4], r5 - vst1.u8 {d25}, [r4], r5 - - pop {r4-r5,pc} - -;--------------------- -secondpass_filter8x4_only -;Second pass: 8x4 - add r3, r12, r3, lsl #5 - sub r0, r0, r1, lsl #1 - vld1.s32 {q5, q6}, [r3] ;load second_pass filter - vabs.s32 q7, q5 - vabs.s32 q8, q6 - - vld1.u8 {d22}, [r0], r1 - vld1.u8 {d23}, [r0], r1 - vld1.u8 {d24}, [r0], r1 - vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) - vld1.u8 {d25}, [r0], r1 - vdup.8 d1, d14[4] - vld1.u8 {d26}, [r0], r1 - vdup.8 d2, d15[0] - vld1.u8 {d27}, [r0], r1 - vdup.8 d3, d15[4] - vld1.u8 {d28}, [r0], r1 - vdup.8 d4, d16[0] - vld1.u8 {d29}, [r0], r1 - vdup.8 d5, d16[4] - vld1.u8 {d30}, [r0], r1 - - vmull.u8 q3, d22, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q4, d23, d0 - vmull.u8 q5, d24, d0 - vmull.u8 q6, d25, d0 - - vmlsl.u8 q3, d23, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q4, d24, d1 - vmlsl.u8 q5, d25, d1 - vmlsl.u8 q6, d26, d1 - - vmlsl.u8 q3, d26, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q4, d27, d4 - vmlsl.u8 q5, d28, d4 - vmlsl.u8 q6, d29, d4 - - vmlal.u8 q3, d24, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q4, d25, d2 - vmlal.u8 q5, d26, d2 - vmlal.u8 q6, d27, d2 - - vmlal.u8 q3, d27, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q4, d28, d5 - vmlal.u8 q5, d29, d5 - vmlal.u8 q6, d30, d5 - - vmull.u8 q7, d25, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q8, d26, d3 - vmull.u8 q9, d27, d3 - vmull.u8 q10, d28, d3 - - vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q4 - vqadd.s16 q9, q5 - vqadd.s16 q10, q6 - - vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d7, q8, #7 - vqrshrun.s16 d8, q9, #7 - vqrshrun.s16 d9, q10, #7 - - vst1.u8 {d6}, [r4], r5 ;store result - vst1.u8 {d7}, [r4], r5 - vst1.u8 {d8}, [r4], r5 - vst1.u8 {d9}, [r4], r5 - - pop {r4-r5,pc} - - ENDP - -;----------------- - - END diff --git a/vp9/common/arm/neon/vp9_sixtappredict8x8_neon.asm b/vp9/common/arm/neon/vp9_sixtappredict8x8_neon.asm deleted file mode 100644 index 5ff16616d..000000000 --- a/vp9/common/arm/neon/vp9_sixtappredict8x8_neon.asm +++ /dev/null @@ -1,524 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sixtap_predict8x8_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -filter8_coeff - DCD 0, 0, 128, 0, 0, 0, 0, 0 - DCD 0, -6, 123, 12, -1, 0, 0, 0 - DCD 2, -11, 108, 36, -8, 1, 0, 0 - DCD 0, -9, 93, 50, -6, 0, 0, 0 - DCD 3, -16, 77, 77, -16, 3, 0, 0 - DCD 0, -6, 50, 93, -9, 0, 0, 0 - DCD 1, -8, 36, 108, -11, 2, 0, 0 - DCD 0, -1, 12, 123, -6, 0, 0, 0 - -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; stack(r4) unsigned char *dst_ptr, -; stack(r5) int dst_pitch - -|vp8_sixtap_predict8x8_neon| PROC - push {r4-r5, lr} - - adr r12, filter8_coeff - - ldr r4, [sp, #12] ;load parameters from stack - ldr r5, [sp, #16] ;load parameters from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq secondpass_filter8x8_only - - add r2, r12, r2, lsl #5 ;calculate filter location - - cmp r3, #0 ;skip second_pass filter if yoffset=0 - - vld1.s32 {q14, q15}, [r2] ;load first_pass filter - - beq firstpass_filter8x8_only - - sub sp, sp, #64 ;reserve space on stack for temporary storage - mov lr, sp - - vabs.s32 q12, q14 - vabs.s32 q13, q15 - - mov r2, #2 ;loop counter - sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2) - sub r0, r0, r1, lsl #1 - - vdup.8 d0, d24[0] ;first_pass filter (d0-d5) - vdup.8 d1, d24[4] - vdup.8 d2, d25[0] - -;First pass: output_height lines x output_width columns (13x8) - vld1.u8 {q3}, [r0], r1 ;load src data - vdup.8 d3, d25[4] - vld1.u8 {q4}, [r0], r1 - vdup.8 d4, d26[0] - vld1.u8 {q5}, [r0], r1 - vdup.8 d5, d26[4] - vld1.u8 {q6}, [r0], r1 - -filt_blk2d_fp8x8_loop_neon - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q8, d8, d0 - vmull.u8 q9, d10, d0 - vmull.u8 q10, d12, d0 - - vext.8 d28, d6, d7, #1 ;construct src_ptr[-1] - vext.8 d29, d8, d9, #1 - vext.8 d30, d10, d11, #1 - vext.8 d31, d12, d13, #1 - - vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q8, d29, d1 - vmlsl.u8 q9, d30, d1 - vmlsl.u8 q10, d31, d1 - - vext.8 d28, d6, d7, #4 ;construct src_ptr[2] - vext.8 d29, d8, d9, #4 - vext.8 d30, d10, d11, #4 - vext.8 d31, d12, d13, #4 - - vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q8, d29, d4 - vmlsl.u8 q9, d30, d4 - vmlsl.u8 q10, d31, d4 - - vext.8 d28, d6, d7, #2 ;construct src_ptr[0] - vext.8 d29, d8, d9, #2 - vext.8 d30, d10, d11, #2 - vext.8 d31, d12, d13, #2 - - vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q8, d29, d2 - vmlal.u8 q9, d30, d2 - vmlal.u8 q10, d31, d2 - - vext.8 d28, d6, d7, #5 ;construct src_ptr[3] - vext.8 d29, d8, d9, #5 - vext.8 d30, d10, d11, #5 - vext.8 d31, d12, d13, #5 - - vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q8, d29, d5 - vmlal.u8 q9, d30, d5 - vmlal.u8 q10, d31, d5 - - vext.8 d28, d6, d7, #3 ;construct src_ptr[1] - vext.8 d29, d8, d9, #3 - vext.8 d30, d10, d11, #3 - vext.8 d31, d12, d13, #3 - - vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q4, d29, d3 - vmull.u8 q5, d30, d3 - vmull.u8 q6, d31, d3 - - subs r2, r2, #1 - - vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q4 - vqadd.s16 q9, q5 - vqadd.s16 q10, q6 - - vld1.u8 {q3}, [r0], r1 ;load src data - - vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d23, q8, #7 - vqrshrun.s16 d24, q9, #7 - vqrshrun.s16 d25, q10, #7 - - vst1.u8 {d22}, [lr]! ;store result - vld1.u8 {q4}, [r0], r1 - vst1.u8 {d23}, [lr]! - vld1.u8 {q5}, [r0], r1 - vst1.u8 {d24}, [lr]! - vld1.u8 {q6}, [r0], r1 - vst1.u8 {d25}, [lr]! - - bne filt_blk2d_fp8x8_loop_neon - - ;first_pass filtering on the rest 5-line data - ;vld1.u8 {q3}, [r0], r1 ;load src data - ;vld1.u8 {q4}, [r0], r1 - ;vld1.u8 {q5}, [r0], r1 - ;vld1.u8 {q6}, [r0], r1 - vld1.u8 {q7}, [r0], r1 - - vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q9, d8, d0 - vmull.u8 q10, d10, d0 - vmull.u8 q11, d12, d0 - vmull.u8 q12, d14, d0 - - vext.8 d27, d6, d7, #1 ;construct src_ptr[-1] - vext.8 d28, d8, d9, #1 - vext.8 d29, d10, d11, #1 - vext.8 d30, d12, d13, #1 - vext.8 d31, d14, d15, #1 - - vmlsl.u8 q8, d27, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q9, d28, d1 - vmlsl.u8 q10, d29, d1 - vmlsl.u8 q11, d30, d1 - vmlsl.u8 q12, d31, d1 - - vext.8 d27, d6, d7, #4 ;construct src_ptr[2] - vext.8 d28, d8, d9, #4 - vext.8 d29, d10, d11, #4 - vext.8 d30, d12, d13, #4 - vext.8 d31, d14, d15, #4 - - vmlsl.u8 q8, d27, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q9, d28, d4 - vmlsl.u8 q10, d29, d4 - vmlsl.u8 q11, d30, d4 - vmlsl.u8 q12, d31, d4 - - vext.8 d27, d6, d7, #2 ;construct src_ptr[0] - vext.8 d28, d8, d9, #2 - vext.8 d29, d10, d11, #2 - vext.8 d30, d12, d13, #2 - vext.8 d31, d14, d15, #2 - - vmlal.u8 q8, d27, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q9, d28, d2 - vmlal.u8 q10, d29, d2 - vmlal.u8 q11, d30, d2 - vmlal.u8 q12, d31, d2 - - vext.8 d27, d6, d7, #5 ;construct src_ptr[3] - vext.8 d28, d8, d9, #5 - vext.8 d29, d10, d11, #5 - vext.8 d30, d12, d13, #5 - vext.8 d31, d14, d15, #5 - - vmlal.u8 q8, d27, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q9, d28, d5 - vmlal.u8 q10, d29, d5 - vmlal.u8 q11, d30, d5 - vmlal.u8 q12, d31, d5 - - vext.8 d27, d6, d7, #3 ;construct src_ptr[1] - vext.8 d28, d8, d9, #3 - vext.8 d29, d10, d11, #3 - vext.8 d30, d12, d13, #3 - vext.8 d31, d14, d15, #3 - - vmull.u8 q3, d27, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q4, d28, d3 - vmull.u8 q5, d29, d3 - vmull.u8 q6, d30, d3 - vmull.u8 q7, d31, d3 - - vqadd.s16 q8, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q9, q4 - vqadd.s16 q10, q5 - vqadd.s16 q11, q6 - vqadd.s16 q12, q7 - - add r3, r12, r3, lsl #5 - - vqrshrun.s16 d26, q8, #7 ;shift/round/saturate to u8 - sub lr, lr, #64 - vqrshrun.s16 d27, q9, #7 - vld1.u8 {q9}, [lr]! ;load intermediate data from stack - vqrshrun.s16 d28, q10, #7 - vld1.u8 {q10}, [lr]! - - vld1.s32 {q5, q6}, [r3] ;load second_pass filter - - vqrshrun.s16 d29, q11, #7 - vld1.u8 {q11}, [lr]! - - vabs.s32 q7, q5 - vabs.s32 q8, q6 - - vqrshrun.s16 d30, q12, #7 - vld1.u8 {q12}, [lr]! - -;Second pass: 8x8 - mov r3, #2 ;loop counter - - vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) - vdup.8 d1, d14[4] - vdup.8 d2, d15[0] - vdup.8 d3, d15[4] - vdup.8 d4, d16[0] - vdup.8 d5, d16[4] - -filt_blk2d_sp8x8_loop_neon - vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q4, d19, d0 - vmull.u8 q5, d20, d0 - vmull.u8 q6, d21, d0 - - vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q4, d20, d1 - vmlsl.u8 q5, d21, d1 - vmlsl.u8 q6, d22, d1 - - vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q4, d23, d4 - vmlsl.u8 q5, d24, d4 - vmlsl.u8 q6, d25, d4 - - vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q4, d21, d2 - vmlal.u8 q5, d22, d2 - vmlal.u8 q6, d23, d2 - - vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q4, d24, d5 - vmlal.u8 q5, d25, d5 - vmlal.u8 q6, d26, d5 - - vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q8, d22, d3 - vmull.u8 q9, d23, d3 - vmull.u8 q10, d24, d3 - - subs r3, r3, #1 - - vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q4 - vqadd.s16 q9, q5 - vqadd.s16 q10, q6 - - vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d7, q8, #7 - vqrshrun.s16 d8, q9, #7 - vqrshrun.s16 d9, q10, #7 - - vmov q9, q11 - vst1.u8 {d6}, [r4], r5 ;store result - vmov q10, q12 - vst1.u8 {d7}, [r4], r5 - vmov q11, q13 - vst1.u8 {d8}, [r4], r5 - vmov q12, q14 - vst1.u8 {d9}, [r4], r5 - vmov d26, d30 - - bne filt_blk2d_sp8x8_loop_neon - - add sp, sp, #64 - pop {r4-r5,pc} - -;--------------------- -firstpass_filter8x8_only - ;add r2, r12, r2, lsl #5 ;calculate filter location - ;vld1.s32 {q14, q15}, [r2] ;load first_pass filter - vabs.s32 q12, q14 - vabs.s32 q13, q15 - - mov r2, #2 ;loop counter - sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2) - - vdup.8 d0, d24[0] ;first_pass filter (d0-d5) - vdup.8 d1, d24[4] - vdup.8 d2, d25[0] - vdup.8 d3, d25[4] - vdup.8 d4, d26[0] - vdup.8 d5, d26[4] - -;First pass: output_height lines x output_width columns (8x8) -filt_blk2d_fpo8x8_loop_neon - vld1.u8 {q3}, [r0], r1 ;load src data - vld1.u8 {q4}, [r0], r1 - vld1.u8 {q5}, [r0], r1 - vld1.u8 {q6}, [r0], r1 - - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q8, d8, d0 - vmull.u8 q9, d10, d0 - vmull.u8 q10, d12, d0 - - vext.8 d28, d6, d7, #1 ;construct src_ptr[-1] - vext.8 d29, d8, d9, #1 - vext.8 d30, d10, d11, #1 - vext.8 d31, d12, d13, #1 - - vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q8, d29, d1 - vmlsl.u8 q9, d30, d1 - vmlsl.u8 q10, d31, d1 - - vext.8 d28, d6, d7, #4 ;construct src_ptr[2] - vext.8 d29, d8, d9, #4 - vext.8 d30, d10, d11, #4 - vext.8 d31, d12, d13, #4 - - vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q8, d29, d4 - vmlsl.u8 q9, d30, d4 - vmlsl.u8 q10, d31, d4 - - vext.8 d28, d6, d7, #2 ;construct src_ptr[0] - vext.8 d29, d8, d9, #2 - vext.8 d30, d10, d11, #2 - vext.8 d31, d12, d13, #2 - - vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q8, d29, d2 - vmlal.u8 q9, d30, d2 - vmlal.u8 q10, d31, d2 - - vext.8 d28, d6, d7, #5 ;construct src_ptr[3] - vext.8 d29, d8, d9, #5 - vext.8 d30, d10, d11, #5 - vext.8 d31, d12, d13, #5 - - vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q8, d29, d5 - vmlal.u8 q9, d30, d5 - vmlal.u8 q10, d31, d5 - - vext.8 d28, d6, d7, #3 ;construct src_ptr[1] - vext.8 d29, d8, d9, #3 - vext.8 d30, d10, d11, #3 - vext.8 d31, d12, d13, #3 - - vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q4, d29, d3 - vmull.u8 q5, d30, d3 - vmull.u8 q6, d31, d3 - ; - vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q4 - vqadd.s16 q9, q5 - vqadd.s16 q10, q6 - - subs r2, r2, #1 - - vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d23, q8, #7 - vqrshrun.s16 d24, q9, #7 - vqrshrun.s16 d25, q10, #7 - - vst1.u8 {d22}, [r4], r5 ;store result - vst1.u8 {d23}, [r4], r5 - vst1.u8 {d24}, [r4], r5 - vst1.u8 {d25}, [r4], r5 - - bne filt_blk2d_fpo8x8_loop_neon - - pop {r4-r5,pc} - -;--------------------- -secondpass_filter8x8_only - sub r0, r0, r1, lsl #1 - add r3, r12, r3, lsl #5 - - vld1.u8 {d18}, [r0], r1 ;load src data - vld1.s32 {q5, q6}, [r3] ;load second_pass filter - vld1.u8 {d19}, [r0], r1 - vabs.s32 q7, q5 - vld1.u8 {d20}, [r0], r1 - vabs.s32 q8, q6 - vld1.u8 {d21}, [r0], r1 - mov r3, #2 ;loop counter - vld1.u8 {d22}, [r0], r1 - vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) - vld1.u8 {d23}, [r0], r1 - vdup.8 d1, d14[4] - vld1.u8 {d24}, [r0], r1 - vdup.8 d2, d15[0] - vld1.u8 {d25}, [r0], r1 - vdup.8 d3, d15[4] - vld1.u8 {d26}, [r0], r1 - vdup.8 d4, d16[0] - vld1.u8 {d27}, [r0], r1 - vdup.8 d5, d16[4] - vld1.u8 {d28}, [r0], r1 - vld1.u8 {d29}, [r0], r1 - vld1.u8 {d30}, [r0], r1 - -;Second pass: 8x8 -filt_blk2d_spo8x8_loop_neon - vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp9_filter[0]) - vmull.u8 q4, d19, d0 - vmull.u8 q5, d20, d0 - vmull.u8 q6, d21, d0 - - vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp9_filter[1]) - vmlsl.u8 q4, d20, d1 - vmlsl.u8 q5, d21, d1 - vmlsl.u8 q6, d22, d1 - - vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp9_filter[4]) - vmlsl.u8 q4, d23, d4 - vmlsl.u8 q5, d24, d4 - vmlsl.u8 q6, d25, d4 - - vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp9_filter[2]) - vmlal.u8 q4, d21, d2 - vmlal.u8 q5, d22, d2 - vmlal.u8 q6, d23, d2 - - vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp9_filter[5]) - vmlal.u8 q4, d24, d5 - vmlal.u8 q5, d25, d5 - vmlal.u8 q6, d26, d5 - - vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp9_filter[3]) - vmull.u8 q8, d22, d3 - vmull.u8 q9, d23, d3 - vmull.u8 q10, d24, d3 - - subs r3, r3, #1 - - vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) - vqadd.s16 q8, q4 - vqadd.s16 q9, q5 - vqadd.s16 q10, q6 - - vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8 - vqrshrun.s16 d7, q8, #7 - vqrshrun.s16 d8, q9, #7 - vqrshrun.s16 d9, q10, #7 - - vmov q9, q11 - vst1.u8 {d6}, [r4], r5 ;store result - vmov q10, q12 - vst1.u8 {d7}, [r4], r5 - vmov q11, q13 - vst1.u8 {d8}, [r4], r5 - vmov q12, q14 - vst1.u8 {d9}, [r4], r5 - vmov d26, d30 - - bne filt_blk2d_spo8x8_loop_neon - - pop {r4-r5,pc} - - ENDP - -;----------------- - - END diff --git a/vp9/common/arm/vp9_arm_systemdependent.c b/vp9/common/arm/vp9_arm_systemdependent.c deleted file mode 100644 index a6319a4c5..000000000 --- a/vp9/common/arm/vp9_arm_systemdependent.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "vpx_ports/arm.h" -#include "vp9/common/vp9_pragmas.h" -#include "vp9/common/vp9_subpixel.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/recon.h" -#include "vp9/common/vp9_onyxc_int.h" - -void vp9_arch_arm_common_init(VP9_COMMON *ctx) { -#if CONFIG_RUNTIME_CPU_DETECT - VP9_COMMON_RTCD *rtcd = &ctx->rtcd; - int flags = arm_cpu_caps(); - rtcd->flags = flags; - - /* Override default functions with fastest ones for this CPU. */ -#if HAVE_ARMV5TE - if (flags & HAS_EDSP) { - } -#endif - -// The commented functions need to be re-written for vpx. -#if HAVE_ARMV6 - if (flags & HAS_MEDIA) { - rtcd->subpix.sixtap16x16 = vp9_sixtap_predict16x16_armv6; - rtcd->subpix.sixtap8x8 = vp9_sixtap_predict8x8_armv6; - rtcd->subpix.sixtap8x4 = vp9_sixtap_predict8x4_armv6; - rtcd->subpix.sixtap4x4 = vp9_sixtap_predict_armv6; - - rtcd->subpix.bilinear16x16 = vp9_bilinear_predict16x16_armv6; - rtcd->subpix.bilinear8x8 = vp9_bilinear_predict8x8_armv6; - rtcd->subpix.bilinear8x4 = vp9_bilinear_predict8x4_armv6; - rtcd->subpix.bilinear4x4 = vp9_bilinear_predict4x4_armv6; - - // rtcd->idct.idct1 = vp9_short_idct4x4llm_1_v6; - // rtcd->idct.idct16 = vp9_short_idct4x4llm_v6_dual; - // rtcd->idct.iwalsh1 = vp9_short_inv_walsh4x4_1_v6; - // rtcd->idct.iwalsh16 = vp9_short_inv_walsh4x4_v6; - - rtcd->recon.copy16x16 = vp9_copy_mem16x16_v6; - rtcd->recon.copy8x8 = vp9_copy_mem8x8_v6; - rtcd->recon.copy8x4 = vp9_copy_mem8x4_v6; - rtcd->recon.recon = vp9_recon_b_armv6; - rtcd->recon.recon2 = vp9_recon2b_armv6; - rtcd->recon.recon4 = vp9_recon4b_armv6; - } -#endif - -#if HAVE_ARMV7 - if (flags & HAS_NEON) { - rtcd->subpix.sixtap16x16 = vp9_sixtap_predict16x16_neon; - rtcd->subpix.sixtap8x8 = vp9_sixtap_predict8x8_neon; - rtcd->subpix.sixtap8x4 = vp9_sixtap_predict8x4_neon; - rtcd->subpix.sixtap4x4 = vp9_sixtap_predict_neon; - - rtcd->subpix.bilinear16x16 = vp9_bilinear_predict16x16_neon; - rtcd->subpix.bilinear8x8 = vp9_bilinear_predict8x8_neon; - rtcd->subpix.bilinear8x4 = vp9_bilinear_predict8x4_neon; - rtcd->subpix.bilinear4x4 = vp9_bilinear_predict4x4_neon; - - // rtcd->idct.idct1 = vp9_short_idct4x4llm_1_neon; - // rtcd->idct.idct16 = vp9_short_idct4x4llm_neon; - // rtcd->idct.iwalsh1 = vp9_short_inv_walsh4x4_1_neon; - // rtcd->idct.iwalsh16 = vp9_short_inv_walsh4x4_neon; - - rtcd->recon.copy16x16 = vp9_copy_mem16x16_neon; - rtcd->recon.copy8x8 = vp9_copy_mem8x8_neon; - rtcd->recon.copy8x4 = vp9_copy_mem8x4_neon; - rtcd->recon.recon = vp9_recon_b_neon; - rtcd->recon.recon2 = vp9_recon2b_neon; - rtcd->recon.recon4 = vp9_recon4b_neon; - rtcd->recon.recon_mb = vp9_recon_mb_neon; - rtcd->recon.build_intra_predictors_mby = - vp9_build_intra_predictors_mby_neon; - rtcd->recon.build_intra_predictors_mby_s = - vp9_build_intra_predictors_mby_s_neon; - } -#endif - -#endif -} diff --git a/vp9/common/arm/vp9_bilinearfilter_arm.c b/vp9/common/arm/vp9_bilinearfilter_arm.c deleted file mode 100644 index 678173141..000000000 --- a/vp9/common/arm/vp9_bilinearfilter_arm.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include -#include "vp9/common/vp9_filter.h" -#include "vp9/common/vp9_subpixel.h" -#include "vp9/common/arm/vp9_bilinearfilter_arm.h" - -void vp9_filter_block2d_bil_armv6 -( - unsigned char *src_ptr, - unsigned char *dst_ptr, - unsigned int src_pitch, - unsigned int dst_pitch, - const short *HFilter, - const short *VFilter, - int Width, - int Height -) { - unsigned short FData[36 * 16]; /* Temp data buffer used in filtering */ - - /* First filter 1-D horizontally... */ - vp9_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); - - /* then 1-D vertically... */ - vp9_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter); -} - - -void vp9_bilinear_predict4x4_armv6 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - vp9_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); -} - -void vp9_bilinear_predict8x8_armv6 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - vp9_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); -} - -void vp9_bilinear_predict8x4_armv6 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - vp9_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); -} - -void vp9_bilinear_predict16x16_armv6 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - vp9_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); -} diff --git a/vp9/common/arm/vp9_bilinearfilter_arm.h b/vp9/common/arm/vp9_bilinearfilter_arm.h deleted file mode 100644 index 422691e44..000000000 --- a/vp9/common/arm/vp9_bilinearfilter_arm.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_ARM_VP9_BILINEARFILTER_ARM_H_ -#define VP9_COMMON_ARM_VP9_BILINEARFILTER_ARM_H_ - -extern void vp9_filter_block2d_bil_first_pass_armv6 -( - const unsigned char *src_ptr, - unsigned short *dst_ptr, - unsigned int src_pitch, - unsigned int height, - unsigned int width, - const short *vp9_filter -); - -extern void vp9_filter_block2d_bil_second_pass_armv6 -( - const unsigned short *src_ptr, - unsigned char *dst_ptr, - int dst_pitch, - unsigned int height, - unsigned int width, - const short *vp9_filter -); - -#endif /* BILINEARFILTER_ARM_H */ diff --git a/vp9/common/arm/vp9_filter_arm.c b/vp9/common/arm/vp9_filter_arm.c deleted file mode 100644 index f55273c33..000000000 --- a/vp9/common/arm/vp9_filter_arm.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include -#include "vp9/common/vp9_filter.h" -#include "vp9/common/vp9_subpixel.h" -#include "vpx_ports/mem.h" - -extern void vp9_filter_block2d_first_pass_armv6 -( - unsigned char *src_ptr, - short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int output_width, - unsigned int output_height, - const short *vp9_filter -); - -// 8x8 -extern void vp9_filter_block2d_first_pass_8x8_armv6 -( - unsigned char *src_ptr, - short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int output_width, - unsigned int output_height, - const short *vp9_filter -); - -// 16x16 -extern void vp9_filter_block2d_first_pass_16x16_armv6 -( - unsigned char *src_ptr, - short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int output_width, - unsigned int output_height, - const short *vp9_filter -); - -extern void vp9_filter_block2d_second_pass_armv6 -( - short *src_ptr, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int cnt, - const short *vp9_filter -); - -extern void vp9_filter4_block2d_second_pass_armv6 -( - short *src_ptr, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int cnt, - const short *vp9_filter -); - -extern void vp9_filter_block2d_first_pass_only_armv6 -( - unsigned char *src_ptr, - unsigned char *output_ptr, - unsigned int src_pixels_per_line, - unsigned int cnt, - unsigned int output_pitch, - const short *vp9_filter -); - - -extern void vp9_filter_block2d_second_pass_only_armv6 -( - unsigned char *src_ptr, - unsigned char *output_ptr, - unsigned int src_pixels_per_line, - unsigned int cnt, - unsigned int output_pitch, - const short *vp9_filter -); - -#if HAVE_ARMV6 -void vp9_sixtap_predict_armv6 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 12 * 4); /* Temp data buffer used in filtering */ - - - HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ - - /* Vfilter is null. First pass only */ - if (xoffset && !yoffset) { - /*vp9_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter ); - vp9_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/ - - vp9_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter); - } - /* Hfilter is null. Second pass only */ - else if (!xoffset && yoffset) { - vp9_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter); - } else { - /* Vfilter is a 4 tap filter */ - if (yoffset & 0x1) { - vp9_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter); - vp9_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter); - } - /* Vfilter is 6 tap filter */ - else { - vp9_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter); - vp9_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter); - } - } -} - -void vp9_sixtap_predict8x8_armv6 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 16 * 8); /* Temp data buffer used in filtering */ - - HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ - - if (xoffset && !yoffset) { - vp9_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter); - } - /* Hfilter is null. Second pass only */ - else if (!xoffset && yoffset) { - vp9_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter); - } else { - if (yoffset & 0x1) { - vp9_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter); - vp9_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter); - } else { - vp9_filter_block2d_first_pass_8x8_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter); - vp9_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter); - } - } -} - - -void vp9_sixtap_predict16x16_armv6 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 24 * 16); /* Temp data buffer used in filtering */ - - HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ - - if (xoffset && !yoffset) { - vp9_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter); - } - /* Hfilter is null. Second pass only */ - else if (!xoffset && yoffset) { - vp9_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter); - } else { - if (yoffset & 0x1) { - vp9_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter); - vp9_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter); - } else { - vp9_filter_block2d_first_pass_16x16_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter); - vp9_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter); - } - } - -} -#endif diff --git a/vp9/common/arm/vp9_idct_arm.h b/vp9/common/arm/vp9_idct_arm.h deleted file mode 100644 index 8112ab913..000000000 --- a/vp9/common/arm/vp9_idct_arm.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_ARM_VP9_IDCT_ARM_H_ -#define VP9_COMMON_ARM_VP9_IDCT_ARM_H_ - -#if HAVE_ARMV6 -extern prototype_idct(vp9_short_idct4x4llm_1_v6); -extern prototype_idct(vp9_short_idct4x4llm_v6_dual); -extern prototype_idct_scalar_add(vp9_dc_only_idct_add_v6); -extern prototype_second_order(vp9_short_inv_walsh4x4_1_v6); -extern prototype_second_order(vp9_short_inv_walsh4x4_v6); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp9_idct_idct1 -#define vp9_idct_idct1 vp9_short_idct4x4llm_1_v6 - -#undef vp9_idct_idct16 -#define vp9_idct_idct16 vp9_short_idct4x4llm_v6_dual - -#undef vp9_idct_idct1_scalar_add -#define vp9_idct_idct1_scalar_add vp9_dc_only_idct_add_v6 - -#undef vp8_idct_iwalsh1 -#define vp8_idct_iwalsh1 vp9_short_inv_walsh4x4_1_v6 - -#undef vp8_idct_iwalsh16 -#define vp8_idct_iwalsh16 vp9_short_inv_walsh4x4_v6 -#endif -#endif - -#if HAVE_ARMV7 -extern prototype_idct(vp9_short_idct4x4llm_1_neon); -extern prototype_idct(vp9_short_idct4x4llm_neon); -extern prototype_idct_scalar_add(vp9_dc_only_idct_add_neon); -extern prototype_second_order(vp9_short_inv_walsh4x4_1_neon); -extern prototype_second_order(vp9_short_inv_walsh4x4_neon); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp9_idct_idct1 -#define vp9_idct_idct1 vp9_short_idct4x4llm_1_neon - -#undef vp9_idct_idct16 -#define vp9_idct_idct16 vp9_short_idct4x4llm_neon - -#undef vp9_idct_idct1_scalar_add -#define vp9_idct_idct1_scalar_add vp9_dc_only_idct_add_neon - -#undef vp8_idct_iwalsh1 -#define vp8_idct_iwalsh1 vp9_short_inv_walsh4x4_1_neon - -#undef vp8_idct_iwalsh16 -#define vp8_idct_iwalsh16 vp9_short_inv_walsh4x4_neon -#endif -#endif - -#endif diff --git a/vp9/common/arm/vp9_loopfilter_arm.c b/vp9/common/arm/vp9_loopfilter_arm.c deleted file mode 100644 index b61f1a86b..000000000 --- a/vp9/common/arm/vp9_loopfilter_arm.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_onyxc_int.h" - -#if HAVE_ARMV6 -extern prototype_loopfilter(vp9_loop_filter_horizontal_edge_armv6); -extern prototype_loopfilter(vp9_loop_filter_vertical_edge_armv6); -extern prototype_loopfilter(vp9_mbloop_filter_horizontal_edge_armv6); -extern prototype_loopfilter(vp9_mbloop_filter_vertical_edge_armv6); -#endif - -#if HAVE_ARMV7 -typedef void loopfilter_y_neon(unsigned char *src, int pitch, - unsigned char blimit, unsigned char limit, unsigned char thresh); -typedef void loopfilter_uv_neon(unsigned char *u, int pitch, - unsigned char blimit, unsigned char limit, unsigned char thresh, - unsigned char *v); - -extern loopfilter_y_neon vp9_loop_filter_horizontal_edge_y_neon; -extern loopfilter_y_neon vp9_loop_filter_vertical_edge_y_neon; -extern loopfilter_y_neon vp9_mbloop_filter_horizontal_edge_y_neon; -extern loopfilter_y_neon vp9_mbloop_filter_vertical_edge_y_neon; - -extern loopfilter_uv_neon vp9_loop_filter_horizontal_edge_uv_neon; -extern loopfilter_uv_neon vp9_loop_filter_vertical_edge_uv_neon; -extern loopfilter_uv_neon vp9_mbloop_filter_horizontal_edge_uv_neon; -extern loopfilter_uv_neon vp9_mbloop_filter_vertical_edge_uv_neon; -#endif - -#if HAVE_ARMV6 -/*ARMV6 loopfilter functions*/ -/* Horizontal MB filtering */ -void vp9_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - vp9_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Vertical MB Filtering */ -void vp9_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - vp9_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Horizontal B Filtering */ -void vp9_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - vp9_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp9_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp9_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp9_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) { - vp9_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit); - vp9_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit); - vp9_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit); -} - -/* Vertical B Filtering */ -void vp9_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - vp9_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp9_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp9_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp9_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp9_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp9_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) { - vp9_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit); - vp9_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit); - vp9_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit); -} -#endif - -#if HAVE_ARMV7 -/* NEON loopfilter functions */ -/* Horizontal MB filtering */ -void vp9_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - unsigned char mblim = *lfi->mblim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - vp9_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr); - - if (u_ptr) - vp9_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr); -} - -/* Vertical MB Filtering */ -void vp9_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - unsigned char mblim = *lfi->mblim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - - vp9_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr); - - if (u_ptr) - vp9_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr); -} - -/* Horizontal B Filtering */ -void vp9_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - unsigned char blim = *lfi->blim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - - vp9_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr); - vp9_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr); - vp9_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr); - - if (u_ptr) - vp9_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride); -} - -/* Vertical B Filtering */ -void vp9_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) { - unsigned char blim = *lfi->blim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - - vp9_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr); - vp9_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr); - vp9_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr); - - if (u_ptr) - vp9_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4); -} -#endif diff --git a/vp9/common/arm/vp9_loopfilter_arm.h b/vp9/common/arm/vp9_loopfilter_arm.h deleted file mode 100644 index 4f12ff31e..000000000 --- a/vp9/common/arm/vp9_loopfilter_arm.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_ARM_VP9_LOOPFILTER_ARM_H_ -#define VP9_COMMON_ARM_VP9_LOOPFILTER_ARM_H_ - -#include "vpx_config.h" - -#if HAVE_ARMV6 -extern prototype_loopfilter_block(vp9_loop_filter_mbv_armv6); -extern prototype_loopfilter_block(vp9_loop_filter_bv_armv6); -extern prototype_loopfilter_block(vp9_loop_filter_mbh_armv6); -extern prototype_loopfilter_block(vp9_loop_filter_bh_armv6); -extern prototype_simple_loopfilter(vp9_loop_filter_bvs_armv6); -extern prototype_simple_loopfilter(vp9_loop_filter_bhs_armv6); -extern prototype_simple_loopfilter(vp9_loop_filter_simple_horizontal_edge_armv6); -extern prototype_simple_loopfilter(vp9_loop_filter_simple_vertical_edge_armv6); - -#endif /* HAVE_ARMV6 */ - -#if HAVE_ARMV7 -extern prototype_loopfilter_block(vp9_loop_filter_mbv_neon); -extern prototype_loopfilter_block(vp9_loop_filter_bv_neon); -extern prototype_loopfilter_block(vp9_loop_filter_mbh_neon); -extern prototype_loopfilter_block(vp9_loop_filter_bh_neon); -extern prototype_simple_loopfilter(vp9_loop_filter_mbvs_neon); -extern prototype_simple_loopfilter(vp9_loop_filter_bvs_neon); -extern prototype_simple_loopfilter(vp9_loop_filter_mbhs_neon); -extern prototype_simple_loopfilter(vp9_loop_filter_bhs_neon); - -#endif /* HAVE_ARMV7 */ - -#endif /* LOOPFILTER_ARM_H */ diff --git a/vp9/common/arm/vp9_recon_arm.h b/vp9/common/arm/vp9_recon_arm.h deleted file mode 100644 index 788385272..000000000 --- a/vp9/common/arm/vp9_recon_arm.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_ARM_VP9_RECON_ARM_H_ -#define VP9_COMMON_ARM_VP9_RECON_ARM_H_ - -#if HAVE_ARMV6 -extern prototype_recon_block(vp9_recon_b_armv6); -extern prototype_recon_block(vp9_recon2b_armv6); -extern prototype_recon_block(vp9_recon4b_armv6); - -extern prototype_copy_block(vp9_copy_mem8x8_v6); -extern prototype_copy_block(vp9_copy_mem8x4_v6); -extern prototype_copy_block(vp9_copy_mem16x16_v6); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_recon_recon -#define vp8_recon_recon vp9_recon_b_armv6 - -#undef vp8_recon_recon2 -#define vp8_recon_recon2 vp9_recon2b_armv6 - -#undef vp8_recon_recon4 -#define vp8_recon_recon4 vp9_recon4b_armv6 - -#undef vp8_recon_copy8x8 -#define vp8_recon_copy8x8 vp9_copy_mem8x8_v6 - -#undef vp8_recon_copy8x4 -#define vp8_recon_copy8x4 vp9_copy_mem8x4_v6 - -#undef vp8_recon_copy16x16 -#define vp8_recon_copy16x16 vp9_copy_mem16x16_v6 -#endif -#endif - -#if HAVE_ARMV7 -extern prototype_recon_block(vp9_recon_b_neon); -extern prototype_recon_block(vp9_recon2b_neon); -extern prototype_recon_block(vp9_recon4b_neon); - -extern prototype_copy_block(vp9_copy_mem8x8_neon); -extern prototype_copy_block(vp9_copy_mem8x4_neon); -extern prototype_copy_block(vp9_copy_mem16x16_neon); - -extern prototype_recon_macroblock(vp9_recon_mb_neon); - -extern prototype_build_intra_predictors(vp9_build_intra_predictors_mby_neon); -extern prototype_build_intra_predictors(vp9_build_intra_predictors_mby_s_neon); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_recon_recon -#define vp8_recon_recon vp9_recon_b_neon - -#undef vp8_recon_recon2 -#define vp8_recon_recon2 vp9_recon2b_neon - -#undef vp8_recon_recon4 -#define vp8_recon_recon4 vp9_recon4b_neon - -#undef vp8_recon_copy8x8 -#define vp8_recon_copy8x8 vp9_copy_mem8x8_neon - -#undef vp8_recon_copy8x4 -#define vp8_recon_copy8x4 vp9_copy_mem8x4_neon - -#undef vp8_recon_copy16x16 -#define vp8_recon_copy16x16 vp9_copy_mem16x16_neon - -#undef vp8_recon_recon_mb -#define vp8_recon_recon_mb vp9_recon_mb_neon - -#undef vp9_recon_build_intra_predictors_mby -#define vp9_recon_build_intra_predictors_mby vp9_build_intra_predictors_mby_neon - -#undef vp9_recon_build_intra_predictors_mby_s -#define vp9_recon_build_intra_predictors_mby_s vp9_build_intra_predictors_mby_s_neon - -#endif -#endif - -#endif diff --git a/vp9/common/arm/vp9_reconintra_arm.c b/vp9/common/arm/vp9_reconintra_arm.c deleted file mode 100644 index 5720828c7..000000000 --- a/vp9/common/arm/vp9_reconintra_arm.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_reconintra.h" -#include "vpx_mem/vpx_mem.h" -#include "vp9/common/recon.h" - -#if HAVE_ARMV7 -extern void vp9_build_intra_predictors_mby_neon_func( - unsigned char *y_buffer, - unsigned char *ypred_ptr, - int y_stride, - int mode, - int Up, - int Left); - -void vp9_build_intra_predictors_mby_neon(MACROBLOCKD *xd) { - unsigned char *y_buffer = xd->dst.y_buffer; - unsigned char *ypred_ptr = xd->predictor; - int y_stride = xd->dst.y_stride; - int mode = xd->mode_info_context->mbmi.mode; - int Up = xd->up_available; - int Left = xd->left_available; - - vp9_build_intra_predictors_mby_neon_func(y_buffer, ypred_ptr, - y_stride, mode, Up, Left); -} -#endif - - -#if HAVE_ARMV7 -extern void vp9_build_intra_predictors_mby_s_neon_func( - unsigned char *y_buffer, - unsigned char *ypred_ptr, - int y_stride, - int mode, - int Up, - int Left); - -void vp9_build_intra_predictors_mby_s_neon(MACROBLOCKD *xd) { - unsigned char *y_buffer = xd->dst.y_buffer; - unsigned char *ypred_ptr = xd->predictor; - int y_stride = xd->dst.y_stride; - int mode = xd->mode_info_context->mbmi.mode; - int Up = xd->up_available; - int Left = xd->left_available; - - vp9_build_intra_predictors_mby_s_neon_func(y_buffer, ypred_ptr, - y_stride, mode, Up, Left); -} - -#endif diff --git a/vp9/common/arm/vp9_subpixel_arm.h b/vp9/common/arm/vp9_subpixel_arm.h deleted file mode 100644 index efc7c1a5d..000000000 --- a/vp9/common/arm/vp9_subpixel_arm.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_ARM_VP9_SUBPIXEL_ARM_H_ -#define VP9_COMMON_ARM_VP9_SUBPIXEL_ARM_H_ - -#if HAVE_ARMV6 -extern prototype_subpixel_predict(vp9_sixtap_predict16x16_armv6); -extern prototype_subpixel_predict(vp9_sixtap_predict8x8_armv6); -extern prototype_subpixel_predict(vp9_sixtap_predict8x4_armv6); -extern prototype_subpixel_predict(vp9_sixtap_predict_armv6); -extern prototype_subpixel_predict(vp9_bilinear_predict16x16_armv6); -extern prototype_subpixel_predict(vp9_bilinear_predict8x8_armv6); -extern prototype_subpixel_predict(vp9_bilinear_predict8x4_armv6); -extern prototype_subpixel_predict(vp9_bilinear_predict4x4_armv6); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp9_subpix_sixtap16x16 -#define vp9_subpix_sixtap16x16 vp9_sixtap_predict16x16_armv6 - -#undef vp9_subpix_sixtap8x8 -#define vp9_subpix_sixtap8x8 vp9_sixtap_predict8x8_armv6 - -#undef vp9_subpix_sixtap8x4 -#define vp9_subpix_sixtap8x4 vp9_sixtap_predict8x4_armv6 - -#undef vp9_subpix_sixtap4x4 -#define vp9_subpix_sixtap4x4 vp9_sixtap_predict_armv6 - -#undef vp9_subpix_bilinear16x16 -#define vp9_subpix_bilinear16x16 vp9_bilinear_predict16x16_armv6 - -#undef vp9_subpix_bilinear8x8 -#define vp9_subpix_bilinear8x8 vp9_bilinear_predict8x8_armv6 - -#undef vp9_subpix_bilinear8x4 -#define vp9_subpix_bilinear8x4 vp9_bilinear_predict8x4_armv6 - -#undef vp9_subpix_bilinear4x4 -#define vp9_subpix_bilinear4x4 vp9_bilinear_predict4x4_armv6 -#endif -#endif - -#if HAVE_ARMV7 -extern prototype_subpixel_predict(vp9_sixtap_predict16x16_neon); -extern prototype_subpixel_predict(vp9_sixtap_predict8x8_neon); -extern prototype_subpixel_predict(vp9_sixtap_predict8x4_neon); -extern prototype_subpixel_predict(vp9_sixtap_predict_neon); -extern prototype_subpixel_predict(vp9_bilinear_predict16x16_neon); -extern prototype_subpixel_predict(vp9_bilinear_predict8x8_neon); -extern prototype_subpixel_predict(vp9_bilinear_predict8x4_neon); -extern prototype_subpixel_predict(vp9_bilinear_predict4x4_neon); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp9_subpix_sixtap16x16 -#define vp9_subpix_sixtap16x16 vp9_sixtap_predict16x16_neon - -#undef vp9_subpix_sixtap8x8 -#define vp9_subpix_sixtap8x8 vp9_sixtap_predict8x8_neon - -#undef vp9_subpix_sixtap8x4 -#define vp9_subpix_sixtap8x4 vp9_sixtap_predict8x4_neon - -#undef vp9_subpix_sixtap4x4 -#define vp9_subpix_sixtap4x4 vp9_sixtap_predict_neon - -#undef vp9_subpix_bilinear16x16 -#define vp9_subpix_bilinear16x16 vp9_bilinear_predict16x16_neon - -#undef vp9_subpix_bilinear8x8 -#define vp9_subpix_bilinear8x8 vp9_bilinear_predict8x8_neon - -#undef vp9_subpix_bilinear8x4 -#define vp9_subpix_bilinear8x4 vp9_bilinear_predict8x4_neon - -#undef vp9_subpix_bilinear4x4 -#define vp9_subpix_bilinear4x4 vp9_bilinear_predict4x4_neon -#endif -#endif - -#endif diff --git a/vp9/common/vp9_asm_com_offsets.c b/vp9/common/vp9_asm_com_offsets.c index 07d3e333a..94ccb6ebd 100644 --- a/vp9/common/vp9_asm_com_offsets.c +++ b/vp9/common/vp9_asm_com_offsets.c @@ -12,29 +12,10 @@ #include "vpx_config.h" #include "vpx/vpx_codec.h" #include "vpx_ports/asm_offsets.h" -#include "vpx_scale/yv12config.h" BEGIN -/* vpx_scale */ -DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width)); -DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height)); -DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride)); -DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width)); -DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height)); -DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride)); -DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer)); -DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer)); -DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer)); -DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border)); -DEFINE(VP9BORDERINPIXELS_VAL, VP9BORDERINPIXELS); - END /* add asserts for any offset that is not supported by assembly code */ /* add asserts for any size that is not supported by assembly code */ - -#if HAVE_ARMV7 -/* vp8_yv12_extend_frame_borders_neon makes several assumptions based on this */ -ct_assert(VP9BORDERINPIXELS_VAL, VP9BORDERINPIXELS == 32) -#endif diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h index 4e8fa78e2..23df2d86d 100644 --- a/vp9/common/vp9_loopfilter.h +++ b/vp9/common/vp9_loopfilter.h @@ -22,11 +22,7 @@ typedef enum { SIMPLE_LOOPFILTER = 1 } LOOPFILTERTYPE; -#if ARCH_ARM -#define SIMD_WIDTH 1 -#else #define SIMD_WIDTH 16 -#endif /* Need to align this structure so when it is declared and * passed it can be loaded into vector registers. @@ -67,10 +63,6 @@ struct loop_filter_info { #include "x86/vp9_loopfilter_x86.h" #endif -#if ARCH_ARM -#include "arm/vp9_loopfilter_arm.h" -#endif - typedef void loop_filter_uvfunction(unsigned char *u, /* source pointer */ int p, /* pitch */ const unsigned char *blimit, diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 1c0ce16e1..ddc64886d 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -79,13 +79,11 @@ specialize vp9_dequant_idct_add_uv_block mmx # RECON # prototype void vp9_copy_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch" -specialize vp9_copy_mem16x16 mmx sse2 media neon dspr2 -vp9_copy_mem16x16_media=vp9_copy_mem16x16_v6 +specialize vp9_copy_mem16x16 mmx sse2 dspr2 vp9_copy_mem16x16_dspr2=vp9_copy_mem16x16_dspr2 prototype void vp9_copy_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch" -specialize vp9_copy_mem8x8 mmx media neon dspr2 -vp9_copy_mem8x8_media=vp9_copy_mem8x8_v6 +specialize vp9_copy_mem8x8 mmx dspr2 vp9_copy_mem8x8_dspr2=vp9_copy_mem8x8_dspr2 prototype void vp9_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch" @@ -98,8 +96,7 @@ prototype void vp9_avg_mem8x8 "unsigned char *src, int src_pitch, unsigned char specialize vp9_avg_mem8x8 prototype void vp9_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch" -specialize vp9_copy_mem8x4 mmx media neon dspr2 -vp9_copy_mem8x4_media=vp9_copy_mem8x4_v6 +specialize vp9_copy_mem8x4 mmx dspr2 vp9_copy_mem8x4_dspr2=vp9_copy_mem8x4_dspr2 prototype void vp9_recon_b "unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride" @@ -193,36 +190,28 @@ prototype void vp9_loop_filter_bh8x8 "unsigned char *y, unsigned char *u, unsign specialize vp9_loop_filter_bh8x8 sse2 prototype void vp9_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp9_loop_filter_simple_mbv mmx sse2 media neon +specialize vp9_loop_filter_simple_mbv mmx sse2 vp9_loop_filter_simple_mbv_c=vp9_loop_filter_simple_vertical_edge_c vp9_loop_filter_simple_mbv_mmx=vp9_loop_filter_simple_vertical_edge_mmx vp9_loop_filter_simple_mbv_sse2=vp9_loop_filter_simple_vertical_edge_sse2 -vp9_loop_filter_simple_mbv_media=vp9_loop_filter_simple_vertical_edge_armv6 -vp9_loop_filter_simple_mbv_neon=vp9_loop_filter_mbvs_neon prototype void vp9_loop_filter_simple_mbh "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp9_loop_filter_simple_mbh mmx sse2 media neon +specialize vp9_loop_filter_simple_mbh mmx sse2 vp9_loop_filter_simple_mbh_c=vp9_loop_filter_simple_horizontal_edge_c vp9_loop_filter_simple_mbh_mmx=vp9_loop_filter_simple_horizontal_edge_mmx vp9_loop_filter_simple_mbh_sse2=vp9_loop_filter_simple_horizontal_edge_sse2 -vp9_loop_filter_simple_mbh_media=vp9_loop_filter_simple_horizontal_edge_armv6 -vp9_loop_filter_simple_mbh_neon=vp9_loop_filter_mbhs_neon prototype void vp9_loop_filter_simple_bv "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp9_loop_filter_simple_bv mmx sse2 media neon +specialize vp9_loop_filter_simple_bv mmx sse2 vp9_loop_filter_simple_bv_c=vp9_loop_filter_bvs_c vp9_loop_filter_simple_bv_mmx=vp9_loop_filter_bvs_mmx vp9_loop_filter_simple_bv_sse2=vp9_loop_filter_bvs_sse2 -vp9_loop_filter_simple_bv_media=vp9_loop_filter_bvs_armv6 -vp9_loop_filter_simple_bv_neon=vp9_loop_filter_bvs_neon prototype void vp9_loop_filter_simple_bh "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp9_loop_filter_simple_bh mmx sse2 media neon +specialize vp9_loop_filter_simple_bh mmx sse2 vp9_loop_filter_simple_bh_c=vp9_loop_filter_bhs_c vp9_loop_filter_simple_bh_mmx=vp9_loop_filter_bhs_mmx vp9_loop_filter_simple_bh_sse2=vp9_loop_filter_bhs_sse2 -vp9_loop_filter_simple_bh_media=vp9_loop_filter_bhs_armv6 -vp9_loop_filter_simple_bh_neon=vp9_loop_filter_bhs_neon # # post proc @@ -683,7 +672,7 @@ prototype void vp9_temporal_filter_apply "unsigned char *frame1, unsigned int st specialize vp9_temporal_filter_apply sse2 prototype void vp9_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc, int fraction" -specialize vp9_yv12_copy_partial_frame neon +specialize vp9_yv12_copy_partial_frame fi @@ -716,11 +705,11 @@ if [ "$CONFIG_SPATIAL_RESAMPLING" = "yes" ]; then fi prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf" -specialize vp8_yv12_extend_frame_borders neon +specialize vp8_yv12_extend_frame_borders prototype void vp8_yv12_copy_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc" -specialize vp8_yv12_copy_frame neon +specialize vp8_yv12_copy_frame prototype void vp8_yv12_copy_y "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc" -specialize vp8_yv12_copy_y neon +specialize vp8_yv12_copy_y diff --git a/vp9/decoder/arm/armv6/vp9_dequant_dc_idct_v6.asm b/vp9/decoder/arm/armv6/vp9_dequant_dc_idct_v6.asm deleted file mode 100644 index 6bebda24f..000000000 --- a/vp9/decoder/arm/armv6/vp9_dequant_dc_idct_v6.asm +++ /dev/null @@ -1,218 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_dequant_dc_idct_add_v6| - - AREA |.text|, CODE, READONLY - -;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred, -; unsigned char *dest, int pitch, int stride, int Dc) -; r0 = input -; r1 = dq -; r2 = pred -; r3 = dest -; sp + 36 = pitch ; +4 = 40 -; sp + 40 = stride ; +4 = 44 -; sp + 44 = Dc ; +4 = 48 - - -|vp8_dequant_dc_idct_add_v6| PROC - stmdb sp!, {r4-r11, lr} - - ldr r6, [sp, #44] - - ldr r4, [r0] ;input - ldr r5, [r1], #4 ;dq - - sub sp, sp, #4 - str r3, [sp] - - smultt r7, r4, r5 - - ldr r4, [r0, #4] ;input - ldr r5, [r1], #4 ;dq - - strh r6, [r0], #2 - strh r7, [r0], #2 - - smulbb r6, r4, r5 - smultt r7, r4, r5 - - ldr r4, [r0, #4] ;input - ldr r5, [r1], #4 ;dq - - strh r6, [r0], #2 - strh r7, [r0], #2 - - mov r12, #3 - -vp8_dequant_dc_add_loop - smulbb r6, r4, r5 - smultt r7, r4, r5 - - ldr r4, [r0, #4] ;input - ldr r5, [r1], #4 ;dq - - strh r6, [r0], #2 - strh r7, [r0], #2 - - smulbb r6, r4, r5 - smultt r7, r4, r5 - - subs r12, r12, #1 - - ldrne r4, [r0, #4] - ldrne r5, [r1], #4 - - strh r6, [r0], #2 - strh r7, [r0], #2 - - bne vp8_dequant_dc_add_loop - - sub r0, r0, #32 - mov r1, r0 - -; short_idct4x4llm_v6_dual - ldr r3, cospi8sqrt2minus1 - ldr r4, sinpi8sqrt2 - ldr r6, [r0, #8] - mov r5, #2 -vp8_dequant_dc_idct_loop1_v6 - ldr r12, [r0, #24] - ldr r14, [r0, #16] - smulwt r9, r3, r6 - smulwb r7, r3, r6 - smulwt r10, r4, r6 - smulwb r8, r4, r6 - pkhbt r7, r7, r9, lsl #16 - smulwt r11, r3, r12 - pkhbt r8, r8, r10, lsl #16 - uadd16 r6, r6, r7 - smulwt r7, r4, r12 - smulwb r9, r3, r12 - smulwb r10, r4, r12 - subs r5, r5, #1 - pkhbt r9, r9, r11, lsl #16 - ldr r11, [r0], #4 - pkhbt r10, r10, r7, lsl #16 - uadd16 r7, r12, r9 - usub16 r7, r8, r7 - uadd16 r6, r6, r10 - uadd16 r10, r11, r14 - usub16 r8, r11, r14 - uadd16 r9, r10, r6 - usub16 r10, r10, r6 - uadd16 r6, r8, r7 - usub16 r7, r8, r7 - str r6, [r1, #8] - ldrne r6, [r0, #8] - str r7, [r1, #16] - str r10, [r1, #24] - str r9, [r1], #4 - bne vp8_dequant_dc_idct_loop1_v6 - - mov r5, #2 - sub r0, r1, #8 -vp8_dequant_dc_idct_loop2_v6 - ldr r6, [r0], #4 - ldr r7, [r0], #4 - ldr r8, [r0], #4 - ldr r9, [r0], #4 - smulwt r1, r3, r6 - smulwt r12, r4, r6 - smulwt lr, r3, r8 - smulwt r10, r4, r8 - pkhbt r11, r8, r6, lsl #16 - pkhbt r1, lr, r1, lsl #16 - pkhbt r12, r10, r12, lsl #16 - pkhtb r6, r6, r8, asr #16 - uadd16 r6, r1, r6 - pkhbt lr, r9, r7, lsl #16 - uadd16 r10, r11, lr - usub16 lr, r11, lr - pkhtb r8, r7, r9, asr #16 - subs r5, r5, #1 - smulwt r1, r3, r8 - smulwb r7, r3, r8 - smulwt r11, r4, r8 - smulwb r9, r4, r8 - pkhbt r1, r7, r1, lsl #16 - uadd16 r8, r1, r8 - pkhbt r11, r9, r11, lsl #16 - usub16 r1, r12, r8 - uadd16 r8, r11, r6 - ldr r9, c0x00040004 - ldr r12, [sp, #40] - uadd16 r6, r10, r8 - usub16 r7, r10, r8 - uadd16 r7, r7, r9 - uadd16 r6, r6, r9 - uadd16 r10, r14, r1 - usub16 r1, r14, r1 - uadd16 r10, r10, r9 - uadd16 r1, r1, r9 - ldr r11, [r2], r12 - mov r8, r7, asr #3 - pkhtb r9, r8, r10, asr #19 - mov r8, r1, asr #3 - pkhtb r8, r8, r6, asr #19 - uxtb16 lr, r11, ror #8 - qadd16 r9, r9, lr - uxtb16 lr, r11 - qadd16 r8, r8, lr - usat16 r9, #8, r9 - usat16 r8, #8, r8 - orr r9, r8, r9, lsl #8 - ldr r11, [r2], r12 - ldr lr, [sp] - ldr r12, [sp, #44] - mov r7, r7, lsl #16 - mov r1, r1, lsl #16 - mov r10, r10, lsl #16 - mov r6, r6, lsl #16 - mov r7, r7, asr #3 - pkhtb r7, r7, r10, asr #19 - mov r1, r1, asr #3 - pkhtb r1, r1, r6, asr #19 - uxtb16 r8, r11, ror #8 - qadd16 r7, r7, r8 - uxtb16 r8, r11 - qadd16 r1, r1, r8 - usat16 r7, #8, r7 - usat16 r1, #8, r1 - orr r1, r1, r7, lsl #8 - str r9, [lr], r12 - str r1, [lr], r12 - str lr, [sp] - bne vp8_dequant_dc_idct_loop2_v6 - -; vpx_memset - sub r0, r0, #32 - add sp, sp, #4 - - mov r12, #0 - str r12, [r0] - str r12, [r0, #4] - str r12, [r0, #8] - str r12, [r0, #12] - str r12, [r0, #16] - str r12, [r0, #20] - str r12, [r0, #24] - str r12, [r0, #28] - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp8_dequant_dc_idct_add_v6| - -; Constant Pool -cospi8sqrt2minus1 DCD 0x00004E7B -sinpi8sqrt2 DCD 0x00008A8C -c0x00040004 DCD 0x00040004 - - END diff --git a/vp9/decoder/arm/armv6/vp9_dequant_idct_v6.asm b/vp9/decoder/arm/armv6/vp9_dequant_idct_v6.asm deleted file mode 100644 index 47b671ca6..000000000 --- a/vp9/decoder/arm/armv6/vp9_dequant_idct_v6.asm +++ /dev/null @@ -1,196 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - EXPORT |vp8_dequant_idct_add_v6| - - AREA |.text|, CODE, READONLY -;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred, -; unsigned char *dest, int pitch, int stride) -; r0 = input -; r1 = dq -; r2 = pred -; r3 = dest -; sp + 36 = pitch ; +4 = 40 -; sp + 40 = stride ; +4 = 44 - - -|vp8_dequant_idct_add_v6| PROC - stmdb sp!, {r4-r11, lr} - - ldr r4, [r0] ;input - ldr r5, [r1], #4 ;dq - - sub sp, sp, #4 - str r3, [sp] - - mov r12, #4 - -vp8_dequant_add_loop - smulbb r6, r4, r5 - smultt r7, r4, r5 - - ldr r4, [r0, #4] ;input - ldr r5, [r1], #4 ;dq - - strh r6, [r0], #2 - strh r7, [r0], #2 - - smulbb r6, r4, r5 - smultt r7, r4, r5 - - subs r12, r12, #1 - - ldrne r4, [r0, #4] - ldrne r5, [r1], #4 - - strh r6, [r0], #2 - strh r7, [r0], #2 - - bne vp8_dequant_add_loop - - sub r0, r0, #32 - mov r1, r0 - -; short_idct4x4llm_v6_dual - ldr r3, cospi8sqrt2minus1 - ldr r4, sinpi8sqrt2 - ldr r6, [r0, #8] - mov r5, #2 -vp8_dequant_idct_loop1_v6 - ldr r12, [r0, #24] - ldr r14, [r0, #16] - smulwt r9, r3, r6 - smulwb r7, r3, r6 - smulwt r10, r4, r6 - smulwb r8, r4, r6 - pkhbt r7, r7, r9, lsl #16 - smulwt r11, r3, r12 - pkhbt r8, r8, r10, lsl #16 - uadd16 r6, r6, r7 - smulwt r7, r4, r12 - smulwb r9, r3, r12 - smulwb r10, r4, r12 - subs r5, r5, #1 - pkhbt r9, r9, r11, lsl #16 - ldr r11, [r0], #4 - pkhbt r10, r10, r7, lsl #16 - uadd16 r7, r12, r9 - usub16 r7, r8, r7 - uadd16 r6, r6, r10 - uadd16 r10, r11, r14 - usub16 r8, r11, r14 - uadd16 r9, r10, r6 - usub16 r10, r10, r6 - uadd16 r6, r8, r7 - usub16 r7, r8, r7 - str r6, [r1, #8] - ldrne r6, [r0, #8] - str r7, [r1, #16] - str r10, [r1, #24] - str r9, [r1], #4 - bne vp8_dequant_idct_loop1_v6 - - mov r5, #2 - sub r0, r1, #8 -vp8_dequant_idct_loop2_v6 - ldr r6, [r0], #4 - ldr r7, [r0], #4 - ldr r8, [r0], #4 - ldr r9, [r0], #4 - smulwt r1, r3, r6 - smulwt r12, r4, r6 - smulwt lr, r3, r8 - smulwt r10, r4, r8 - pkhbt r11, r8, r6, lsl #16 - pkhbt r1, lr, r1, lsl #16 - pkhbt r12, r10, r12, lsl #16 - pkhtb r6, r6, r8, asr #16 - uadd16 r6, r1, r6 - pkhbt lr, r9, r7, lsl #16 - uadd16 r10, r11, lr - usub16 lr, r11, lr - pkhtb r8, r7, r9, asr #16 - subs r5, r5, #1 - smulwt r1, r3, r8 - smulwb r7, r3, r8 - smulwt r11, r4, r8 - smulwb r9, r4, r8 - pkhbt r1, r7, r1, lsl #16 - uadd16 r8, r1, r8 - pkhbt r11, r9, r11, lsl #16 - usub16 r1, r12, r8 - uadd16 r8, r11, r6 - ldr r9, c0x00040004 - ldr r12, [sp, #40] - uadd16 r6, r10, r8 - usub16 r7, r10, r8 - uadd16 r7, r7, r9 - uadd16 r6, r6, r9 - uadd16 r10, r14, r1 - usub16 r1, r14, r1 - uadd16 r10, r10, r9 - uadd16 r1, r1, r9 - ldr r11, [r2], r12 - mov r8, r7, asr #3 - pkhtb r9, r8, r10, asr #19 - mov r8, r1, asr #3 - pkhtb r8, r8, r6, asr #19 - uxtb16 lr, r11, ror #8 - qadd16 r9, r9, lr - uxtb16 lr, r11 - qadd16 r8, r8, lr - usat16 r9, #8, r9 - usat16 r8, #8, r8 - orr r9, r8, r9, lsl #8 - ldr r11, [r2], r12 - ldr lr, [sp] - ldr r12, [sp, #44] - mov r7, r7, lsl #16 - mov r1, r1, lsl #16 - mov r10, r10, lsl #16 - mov r6, r6, lsl #16 - mov r7, r7, asr #3 - pkhtb r7, r7, r10, asr #19 - mov r1, r1, asr #3 - pkhtb r1, r1, r6, asr #19 - uxtb16 r8, r11, ror #8 - qadd16 r7, r7, r8 - uxtb16 r8, r11 - qadd16 r1, r1, r8 - usat16 r7, #8, r7 - usat16 r1, #8, r1 - orr r1, r1, r7, lsl #8 - str r9, [lr], r12 - str r1, [lr], r12 - str lr, [sp] - bne vp8_dequant_idct_loop2_v6 - -; vpx_memset - sub r0, r0, #32 - add sp, sp, #4 - - mov r12, #0 - str r12, [r0] - str r12, [r0, #4] - str r12, [r0, #8] - str r12, [r0, #12] - str r12, [r0, #16] - str r12, [r0, #20] - str r12, [r0, #24] - str r12, [r0, #28] - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp8_dequant_idct_add_v6| - -; Constant Pool -cospi8sqrt2minus1 DCD 0x00004E7B -sinpi8sqrt2 DCD 0x00008A8C -c0x00040004 DCD 0x00040004 - - END diff --git a/vp9/decoder/arm/armv6/vp9_dequantize_v6.asm b/vp9/decoder/arm/armv6/vp9_dequantize_v6.asm deleted file mode 100644 index 72f7e0ee5..000000000 --- a/vp9/decoder/arm/armv6/vp9_dequantize_v6.asm +++ /dev/null @@ -1,69 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_dequantize_b_loop_v6| - - AREA |.text|, CODE, READONLY ; name this block of code -;------------------------------- -;void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ); -; r0 short *Q, -; r1 short *DQC -; r2 short *DQ -|vp8_dequantize_b_loop_v6| PROC - stmdb sp!, {r4-r9, lr} - - ldr r3, [r0] ;load Q - ldr r4, [r1] ;load DQC - ldr r5, [r0, #4] - ldr r6, [r1, #4] - - mov r12, #2 ;loop counter - -dequant_loop - smulbb r7, r3, r4 ;multiply - smultt r8, r3, r4 - smulbb r9, r5, r6 - smultt lr, r5, r6 - - ldr r3, [r0, #8] - ldr r4, [r1, #8] - ldr r5, [r0, #12] - ldr r6, [r1, #12] - - strh r7, [r2], #2 ;store result - smulbb r7, r3, r4 ;multiply - strh r8, [r2], #2 - smultt r8, r3, r4 - strh r9, [r2], #2 - smulbb r9, r5, r6 - strh lr, [r2], #2 - smultt lr, r5, r6 - - subs r12, r12, #1 - - add r0, r0, #16 - add r1, r1, #16 - - ldrne r3, [r0] - strh r7, [r2], #2 ;store result - ldrne r4, [r1] - strh r8, [r2], #2 - ldrne r5, [r0, #4] - strh r9, [r2], #2 - ldrne r6, [r1, #4] - strh lr, [r2], #2 - - bne dequant_loop - - ldmia sp!, {r4-r9, pc} - ENDP ;|vp8_dequantize_b_loop_v6| - - END diff --git a/vp9/decoder/arm/armv6/vp9_idct_blk_v6.c b/vp9/decoder/arm/armv6/vp9_idct_blk_v6.c deleted file mode 100644 index d4fa4b52f..000000000 --- a/vp9/decoder/arm/armv6/vp9_idct_blk_v6.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_ports/config.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/decoder/vp9_dequantize.h" - -void vp8_dequant_dc_idct_add_y_block_v6(short *q, short *dq, - unsigned char *pre, - unsigned char *dst, int stride, - unsigned short *eobs, short *dc) { - int i; - - for (i = 0; i < 4; i++) { - if (eobs[0] > 1) - vp8_dequant_dc_idct_add_v6(q, dq, pre, dst, 16, stride, dc[0]); - else - vp8_dc_only_idct_add_v6(dc[0], pre, dst, 16, stride); - - if (eobs[1] > 1) - vp8_dequant_dc_idct_add_v6(q + 16, dq, pre + 4, dst + 4, 16, stride, dc[1]); - else - vp8_dc_only_idct_add_v6(dc[1], pre + 4, dst + 4, 16, stride); - - if (eobs[2] > 1) - vp8_dequant_dc_idct_add_v6(q + 32, dq, pre + 8, dst + 8, 16, stride, dc[2]); - else - vp8_dc_only_idct_add_v6(dc[2], pre + 8, dst + 8, 16, stride); - - if (eobs[3] > 1) - vp8_dequant_dc_idct_add_v6(q + 48, dq, pre + 12, dst + 12, 16, stride, dc[3]); - else - vp8_dc_only_idct_add_v6(dc[3], pre + 12, dst + 12, 16, stride); - - q += 64; - dc += 4; - pre += 64; - dst += 4 * stride; - eobs += 4; - } -} - -void vp8_dequant_idct_add_y_block_v6(short *q, short *dq, unsigned char *pre, - unsigned char *dst, int stride, - unsigned short *eobs) { - int i; - - for (i = 0; i < 4; i++) { - if (eobs[0] > 1) - vp8_dequant_idct_add_v6(q, dq, pre, dst, 16, stride); - else { - vp8_dc_only_idct_add_v6(q[0]*dq[0], pre, dst, 16, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_v6(q + 16, dq, pre + 4, dst + 4, 16, stride); - else { - vp8_dc_only_idct_add_v6(q[16]*dq[0], pre + 4, dst + 4, 16, stride); - ((int *)(q + 16))[0] = 0; - } - - if (eobs[2] > 1) - vp8_dequant_idct_add_v6(q + 32, dq, pre + 8, dst + 8, 16, stride); - else { - vp8_dc_only_idct_add_v6(q[32]*dq[0], pre + 8, dst + 8, 16, stride); - ((int *)(q + 32))[0] = 0; - } - - if (eobs[3] > 1) - vp8_dequant_idct_add_v6(q + 48, dq, pre + 12, dst + 12, 16, stride); - else { - vp8_dc_only_idct_add_v6(q[48]*dq[0], pre + 12, dst + 12, 16, stride); - ((int *)(q + 48))[0] = 0; - } - - q += 64; - pre += 64; - dst += 4 * stride; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq, unsigned char *pre, - unsigned char *dstu, unsigned char *dstv, - int stride, unsigned short *eobs) { - int i; - - for (i = 0; i < 2; i++) { - if (eobs[0] > 1) - vp8_dequant_idct_add_v6(q, dq, pre, dstu, 8, stride); - else { - vp8_dc_only_idct_add_v6(q[0]*dq[0], pre, dstu, 8, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_v6(q + 16, dq, pre + 4, dstu + 4, 8, stride); - else { - vp8_dc_only_idct_add_v6(q[16]*dq[0], pre + 4, dstu + 4, 8, stride); - ((int *)(q + 16))[0] = 0; - } - - q += 32; - pre += 32; - dstu += 4 * stride; - eobs += 2; - } - - for (i = 0; i < 2; i++) { - if (eobs[0] > 1) - vp8_dequant_idct_add_v6(q, dq, pre, dstv, 8, stride); - else { - vp8_dc_only_idct_add_v6(q[0]*dq[0], pre, dstv, 8, stride); - ((int *)q)[0] = 0; - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_v6(q + 16, dq, pre + 4, dstv + 4, 8, stride); - else { - vp8_dc_only_idct_add_v6(q[16]*dq[0], pre + 4, dstv + 4, 8, stride); - ((int *)(q + 16))[0] = 0; - } - - q += 32; - pre += 32; - dstv += 4 * stride; - eobs += 2; - } -} diff --git a/vp9/decoder/arm/neon/vp9_dequant_idct_neon.asm b/vp9/decoder/arm/neon/vp9_dequant_idct_neon.asm deleted file mode 100644 index 4bf661857..000000000 --- a/vp9/decoder/arm/neon/vp9_dequant_idct_neon.asm +++ /dev/null @@ -1,129 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_dequant_idct_add_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp8_dequant_idct_neon(short *input, short *dq, unsigned char *pred, -; unsigned char *dest, int pitch, int stride) -; r0 short *input, -; r1 short *dq, -; r2 unsigned char *pred -; r3 unsigned char *dest -; sp int pitch -; sp+4 int stride - -|vp8_dequant_idct_add_neon| PROC - vld1.16 {q3, q4}, [r0] - vld1.16 {q5, q6}, [r1] - ldr r1, [sp] ; pitch - vld1.32 {d14[0]}, [r2], r1 - vld1.32 {d14[1]}, [r2], r1 - vld1.32 {d15[0]}, [r2], r1 - vld1.32 {d15[1]}, [r2] - - ldr r1, [sp, #4] ; stride - - adr r12, cospi8sqrt2minus1 ; pointer to the first constant - - vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon - vmul.i16 q2, q4, q6 - -;|short_idct4x4llm_neon| PROC - vld1.16 {d0}, [r12] - vswp d3, d4 ;q2(vp[4] vp[12]) - - vqdmulh.s16 q3, q2, d0[2] - vqdmulh.s16 q4, q2, d0[0] - - vqadd.s16 d12, d2, d3 ;a1 - vqsub.s16 d13, d2, d3 ;b1 - - vshr.s16 q3, q3, #1 - vshr.s16 q4, q4, #1 - - vqadd.s16 q3, q3, q2 - vqadd.s16 q4, q4, q2 - - vqsub.s16 d10, d6, d9 ;c1 - vqadd.s16 d11, d7, d8 ;d1 - - vqadd.s16 d2, d12, d11 - vqadd.s16 d3, d13, d10 - vqsub.s16 d4, d13, d10 - vqsub.s16 d5, d12, d11 - - vtrn.32 d2, d4 - vtrn.32 d3, d5 - vtrn.16 d2, d3 - vtrn.16 d4, d5 - -; memset(input, 0, 32) -- 32bytes - vmov.i16 q14, #0 - - vswp d3, d4 - vqdmulh.s16 q3, q2, d0[2] - vqdmulh.s16 q4, q2, d0[0] - - vqadd.s16 d12, d2, d3 ;a1 - vqsub.s16 d13, d2, d3 ;b1 - - vmov q15, q14 - - vshr.s16 q3, q3, #1 - vshr.s16 q4, q4, #1 - - vqadd.s16 q3, q3, q2 - vqadd.s16 q4, q4, q2 - - vqsub.s16 d10, d6, d9 ;c1 - vqadd.s16 d11, d7, d8 ;d1 - - vqadd.s16 d2, d12, d11 - vqadd.s16 d3, d13, d10 - vqsub.s16 d4, d13, d10 - vqsub.s16 d5, d12, d11 - - vst1.16 {q14, q15}, [r0] - - vrshr.s16 d2, d2, #3 - vrshr.s16 d3, d3, #3 - vrshr.s16 d4, d4, #3 - vrshr.s16 d5, d5, #3 - - vtrn.32 d2, d4 - vtrn.32 d3, d5 - vtrn.16 d2, d3 - vtrn.16 d4, d5 - - vaddw.u8 q1, q1, d14 - vaddw.u8 q2, q2, d15 - - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - - vst1.32 {d0[0]}, [r3], r1 - vst1.32 {d0[1]}, [r3], r1 - vst1.32 {d1[0]}, [r3], r1 - vst1.32 {d1[1]}, [r3] - - bx lr - - ENDP ; |vp8_dequant_idct_add_neon| - -; Constant Pool -cospi8sqrt2minus1 DCD 0x4e7b4e7b -sinpi8sqrt2 DCD 0x8a8c8a8c - - END diff --git a/vp9/decoder/arm/neon/vp9_dequantizeb_neon.asm b/vp9/decoder/arm/neon/vp9_dequantizeb_neon.asm deleted file mode 100644 index c8e0c31f2..000000000 --- a/vp9/decoder/arm/neon/vp9_dequantizeb_neon.asm +++ /dev/null @@ -1,34 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_dequantize_b_loop_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; r0 short *Q, -; r1 short *DQC -; r2 short *DQ -|vp8_dequantize_b_loop_neon| PROC - vld1.16 {q0, q1}, [r0] - vld1.16 {q2, q3}, [r1] - - vmul.i16 q4, q0, q2 - vmul.i16 q5, q1, q3 - - vst1.16 {q4, q5}, [r2] - - bx lr - - ENDP - - END diff --git a/vp9/decoder/arm/neon/vp9_idct_blk_neon.c b/vp9/decoder/arm/neon/vp9_idct_blk_neon.c deleted file mode 100644 index f2620d972..000000000 --- a/vp9/decoder/arm/neon/vp9_idct_blk_neon.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_ports/config.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/decoder/vp9_dequantize.h" - -/* place these declarations here because we don't want to maintain them - * outside of this scope - */ -void idct_dequant_dc_full_2x_neon -(short *input, short *dq, unsigned char *pre, unsigned char *dst, - int stride, short *dc); -void idct_dequant_dc_0_2x_neon -(short *dc, unsigned char *pre, unsigned char *dst, int stride); -void idct_dequant_full_2x_neon -(short *q, short *dq, unsigned char *pre, unsigned char *dst, - int pitch, int stride); -void idct_dequant_0_2x_neon -(short *q, short dq, unsigned char *pre, int pitch, - unsigned char *dst, int stride); - -void vp8_dequant_dc_idct_add_y_block_neon(short *q, short *dq, - unsigned char *pre, - unsigned char *dst, int stride, - unsigned short *eobs, short *dc) { - int i; - - for (i = 0; i < 4; i++) { - if (((short *)eobs)[0] & 0xfefe) - idct_dequant_dc_full_2x_neon(q, dq, pre, dst, stride, dc); - else - idct_dequant_dc_0_2x_neon(dc, pre, dst, stride); - - if (((short *)eobs)[1] & 0xfefe) - idct_dequant_dc_full_2x_neon(q + 32, dq, pre + 8, dst + 8, stride, dc + 2); - else - idct_dequant_dc_0_2x_neon(dc + 2, pre + 8, dst + 8, stride); - - q += 64; - dc += 4; - pre += 64; - dst += 4 * stride; - eobs += 4; - } -} - -void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *pre, - unsigned char *dst, int stride, - unsigned short *eobs) { - int i; - - for (i = 0; i < 4; i++) { - if (((short *)eobs)[0] & 0xfefe) - idct_dequant_full_2x_neon(q, dq, pre, dst, 16, stride); - else - idct_dequant_0_2x_neon(q, dq[0], pre, 16, dst, stride); - - if (((short *)eobs)[1] & 0xfefe) - idct_dequant_full_2x_neon(q + 32, dq, pre + 8, dst + 8, 16, stride); - else - idct_dequant_0_2x_neon(q + 32, dq[0], pre + 8, 16, dst + 8, stride); - - q += 64; - pre += 64; - dst += 4 * stride; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, - unsigned char *pre, - unsigned char *dstu, - unsigned char *dstv, int stride, - unsigned short *eobs) { - if (((short *)eobs)[0] & 0xfefe) - idct_dequant_full_2x_neon(q, dq, pre, dstu, 8, stride); - else - idct_dequant_0_2x_neon(q, dq[0], pre, 8, dstu, stride); - - q += 32; - pre += 32; - dstu += 4 * stride; - - if (((short *)eobs)[1] & 0xfefe) - idct_dequant_full_2x_neon(q, dq, pre, dstu, 8, stride); - else - idct_dequant_0_2x_neon(q, dq[0], pre, 8, dstu, stride); - - q += 32; - pre += 32; - - if (((short *)eobs)[2] & 0xfefe) - idct_dequant_full_2x_neon(q, dq, pre, dstv, 8, stride); - else - idct_dequant_0_2x_neon(q, dq[0], pre, 8, dstv, stride); - - q += 32; - pre += 32; - dstv += 4 * stride; - - if (((short *)eobs)[3] & 0xfefe) - idct_dequant_full_2x_neon(q, dq, pre, dstv, 8, stride); - else - idct_dequant_0_2x_neon(q, dq[0], pre, 8, dstv, stride); -} diff --git a/vp9/decoder/arm/neon/vp9_idct_dequant_0_2x_neon.asm b/vp9/decoder/arm/neon/vp9_idct_dequant_0_2x_neon.asm deleted file mode 100644 index 456f8e1d4..000000000 --- a/vp9/decoder/arm/neon/vp9_idct_dequant_0_2x_neon.asm +++ /dev/null @@ -1,79 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |idct_dequant_0_2x_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void idct_dequant_0_2x_neon(short *q, short dq, unsigned char *pre, -; int pitch, unsigned char *dst, int stride); -; r0 *q -; r1 dq -; r2 *pre -; r3 pitch -; sp *dst -; sp+4 stride -|idct_dequant_0_2x_neon| PROC - add r12, r2, #4 - vld1.32 {d2[0]}, [r2], r3 - vld1.32 {d2[1]}, [r2], r3 - vld1.32 {d4[0]}, [r2], r3 - vld1.32 {d4[1]}, [r2] - vld1.32 {d8[0]}, [r12], r3 - vld1.32 {d8[1]}, [r12], r3 - vld1.32 {d10[0]}, [r12], r3 - vld1.32 {d10[1]}, [r12] - - ldrh r12, [r0] ; lo q - ldrh r2, [r0, #32] ; hi q - mov r3, #0 - strh r3, [r0] - strh r3, [r0, #32] - - sxth r12, r12 ; lo - mul r0, r12, r1 - add r0, r0, #4 - asr r0, r0, #3 - vdup.16 q0, r0 - sxth r2, r2 ; hi - mul r0, r2, r1 - add r0, r0, #4 - asr r0, r0, #3 - vdup.16 q3, r0 - - vaddw.u8 q1, q0, d2 ; lo - vaddw.u8 q2, q0, d4 - vaddw.u8 q4, q3, d8 ; hi - vaddw.u8 q5, q3, d10 - - ldr r2, [sp] ; dst - ldr r3, [sp, #4] ; stride - - vqmovun.s16 d2, q1 ; lo - vqmovun.s16 d4, q2 - vqmovun.s16 d8, q4 ; hi - vqmovun.s16 d10, q5 - - add r0, r2, #4 - vst1.32 {d2[0]}, [r2], r3 ; lo - vst1.32 {d2[1]}, [r2], r3 - vst1.32 {d4[0]}, [r2], r3 - vst1.32 {d4[1]}, [r2] - vst1.32 {d8[0]}, [r0], r3 ; hi - vst1.32 {d8[1]}, [r0], r3 - vst1.32 {d10[0]}, [r0], r3 - vst1.32 {d10[1]}, [r0] - - bx lr - - ENDP ; |idct_dequant_0_2x_neon| - END diff --git a/vp9/decoder/arm/neon/vp9_idct_dequant_dc_0_2x_neon.asm b/vp9/decoder/arm/neon/vp9_idct_dequant_dc_0_2x_neon.asm deleted file mode 100644 index 0dc036acb..000000000 --- a/vp9/decoder/arm/neon/vp9_idct_dequant_dc_0_2x_neon.asm +++ /dev/null @@ -1,69 +0,0 @@ -; -; Copyright (c) 2010 The Webm project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |idct_dequant_dc_0_2x_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void idct_dequant_dc_0_2x_neon(short *dc, unsigned char *pre, -; unsigned char *dst, int stride); -; r0 *dc -; r1 *pre -; r2 *dst -; r3 stride -|idct_dequant_dc_0_2x_neon| PROC - ldr r0, [r0] ; *dc - mov r12, #16 - - vld1.32 {d2[0]}, [r1], r12 ; lo - vld1.32 {d2[1]}, [r1], r12 - vld1.32 {d4[0]}, [r1], r12 - vld1.32 {d4[1]}, [r1] - sub r1, r1, #44 - vld1.32 {d8[0]}, [r1], r12 ; hi - vld1.32 {d8[1]}, [r1], r12 - vld1.32 {d10[0]}, [r1], r12 - vld1.32 {d10[1]}, [r1] - - sxth r1, r0 ; lo *dc - add r1, r1, #4 - asr r1, r1, #3 - vdup.16 q0, r1 - sxth r0, r0, ror #16 ; hi *dc - add r0, r0, #4 - asr r0, r0, #3 - vdup.16 q3, r0 - - vaddw.u8 q1, q0, d2 ; lo - vaddw.u8 q2, q0, d4 - vaddw.u8 q4, q3, d8 ; hi - vaddw.u8 q5, q3, d10 - - vqmovun.s16 d2, q1 ; lo - vqmovun.s16 d4, q2 - vqmovun.s16 d8, q4 ; hi - vqmovun.s16 d10, q5 - - add r0, r2, #4 - vst1.32 {d2[0]}, [r2], r3 ; lo - vst1.32 {d2[1]}, [r2], r3 - vst1.32 {d4[0]}, [r2], r3 - vst1.32 {d4[1]}, [r2] - vst1.32 {d8[0]}, [r0], r3 ; hi - vst1.32 {d8[1]}, [r0], r3 - vst1.32 {d10[0]}, [r0], r3 - vst1.32 {d10[1]}, [r0] - - bx lr - - ENDP ;|idct_dequant_dc_0_2x_neon| - END diff --git a/vp9/decoder/arm/neon/vp9_idct_dequant_dc_full_2x_neon.asm b/vp9/decoder/arm/neon/vp9_idct_dequant_dc_full_2x_neon.asm deleted file mode 100644 index 61fa66075..000000000 --- a/vp9/decoder/arm/neon/vp9_idct_dequant_dc_full_2x_neon.asm +++ /dev/null @@ -1,205 +0,0 @@ -; -; Copyright (c) 2010 The Webm project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |idct_dequant_dc_full_2x_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void idct_dequant_dc_full_2x_neon(short *q, short *dq, unsigned char *pre, -; unsigned char *dst, int stride, short *dc); -; r0 *q, -; r1 *dq, -; r2 *pre -; r3 *dst -; sp stride -; sp+4 *dc -|idct_dequant_dc_full_2x_neon| PROC - vld1.16 {q0, q1}, [r1] ; dq (same l/r) - vld1.16 {q2, q3}, [r0] ; l q - mov r1, #16 ; pitch - add r0, r0, #32 - vld1.16 {q4, q5}, [r0] ; r q - add r12, r2, #4 - ; interleave the predictors - vld1.32 {d28[0]}, [r2], r1 ; l pre - vld1.32 {d28[1]}, [r12], r1 ; r pre - vld1.32 {d29[0]}, [r2], r1 - vld1.32 {d29[1]}, [r12], r1 - vld1.32 {d30[0]}, [r2], r1 - vld1.32 {d30[1]}, [r12], r1 - vld1.32 {d31[0]}, [r2] - ldr r1, [sp, #4] - vld1.32 {d31[1]}, [r12] - - adr r2, cospi8sqrt2minus1 ; pointer to the first constant - - ldrh r12, [r1], #2 ; lo *dc - ldrh r1, [r1] ; hi *dc - - ; dequant: q[i] = q[i] * dq[i] - vmul.i16 q2, q2, q0 - vmul.i16 q3, q3, q1 - vmul.i16 q4, q4, q0 - vmul.i16 q5, q5, q1 - - ; move dc up to neon and overwrite first element - vmov.16 d4[0], r12 - vmov.16 d8[0], r1 - - vld1.16 {d0}, [r2] - - ; q2: l0r0 q3: l8r8 - ; q4: l4r4 q5: l12r12 - vswp d5, d8 - vswp d7, d10 - - ; _CONSTANTS_ * 4,12 >> 16 - ; q6: 4 * sinpi : c1/temp1 - ; q7: 12 * sinpi : d1/temp2 - ; q8: 4 * cospi - ; q9: 12 * cospi - vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2 - vqdmulh.s16 q7, q5, d0[2] - vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1 - vqdmulh.s16 q9, q5, d0[0] - - vqadd.s16 q10, q2, q3 ; a1 = 0 + 8 - vqsub.s16 q11, q2, q3 ; b1 = 0 - 8 - - ; vqdmulh only accepts signed values. this was a problem because - ; our constant had the high bit set, and was treated as a negative value. - ; vqdmulh also doubles the value before it shifts by 16. we need to - ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0, - ; so we can shift the constant without losing precision. this avoids - ; shift again afterward, but also avoids the sign issue. win win! - ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we - ; pre-shift it - vshr.s16 q8, q8, #1 - vshr.s16 q9, q9, #1 - - ; q4: 4 + 4 * cospi : d1/temp1 - ; q5: 12 + 12 * cospi : c1/temp2 - vqadd.s16 q4, q4, q8 - vqadd.s16 q5, q5, q9 - - ; c1 = temp1 - temp2 - ; d1 = temp1 + temp2 - vqsub.s16 q2, q6, q5 - vqadd.s16 q3, q4, q7 - - ; [0]: a1+d1 - ; [1]: b1+c1 - ; [2]: b1-c1 - ; [3]: a1-d1 - vqadd.s16 q4, q10, q3 - vqadd.s16 q5, q11, q2 - vqsub.s16 q6, q11, q2 - vqsub.s16 q7, q10, q3 - - ; rotate - vtrn.32 q4, q6 - vtrn.32 q5, q7 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - ; idct loop 2 - ; q4: l 0, 4, 8,12 r 0, 4, 8,12 - ; q5: l 1, 5, 9,13 r 1, 5, 9,13 - ; q6: l 2, 6,10,14 r 2, 6,10,14 - ; q7: l 3, 7,11,15 r 3, 7,11,15 - - ; q8: 1 * sinpi : c1/temp1 - ; q9: 3 * sinpi : d1/temp2 - ; q10: 1 * cospi - ; q11: 3 * cospi - vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2 - vqdmulh.s16 q9, q7, d0[2] - vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1 - vqdmulh.s16 q11, q7, d0[0] - - vqadd.s16 q2, q4, q6 ; a1 = 0 + 2 - vqsub.s16 q3, q4, q6 ; b1 = 0 - 2 - - ; see note on shifting above - vshr.s16 q10, q10, #1 - vshr.s16 q11, q11, #1 - - ; q10: 1 + 1 * cospi : d1/temp1 - ; q11: 3 + 3 * cospi : c1/temp2 - vqadd.s16 q10, q5, q10 - vqadd.s16 q11, q7, q11 - - ; q8: c1 = temp1 - temp2 - ; q9: d1 = temp1 + temp2 - vqsub.s16 q8, q8, q11 - vqadd.s16 q9, q10, q9 - - ; a1+d1 - ; b1+c1 - ; b1-c1 - ; a1-d1 - vqadd.s16 q4, q2, q9 - vqadd.s16 q5, q3, q8 - vqsub.s16 q6, q3, q8 - vqsub.s16 q7, q2, q9 - - ; +4 >> 3 (rounding) - vrshr.s16 q4, q4, #3 ; lo - vrshr.s16 q5, q5, #3 - vrshr.s16 q6, q6, #3 ; hi - vrshr.s16 q7, q7, #3 - - vtrn.32 q4, q6 - vtrn.32 q5, q7 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - - ; adding pre - ; input is still packed. pre was read interleaved - vaddw.u8 q4, q4, d28 - vaddw.u8 q5, q5, d29 - vaddw.u8 q6, q6, d30 - vaddw.u8 q7, q7, d31 - - vmov.i16 q14, #0 - vmov q15, q14 - vst1.16 {q14, q15}, [r0] ; write over high input - sub r0, r0, #32 - vst1.16 {q14, q15}, [r0] ; write over low input - - ;saturate and narrow - vqmovun.s16 d0, q4 ; lo - vqmovun.s16 d1, q5 - vqmovun.s16 d2, q6 ; hi - vqmovun.s16 d3, q7 - - ldr r1, [sp] ; stride - add r2, r3, #4 ; hi - vst1.32 {d0[0]}, [r3], r1 ; lo - vst1.32 {d0[1]}, [r2], r1 ; hi - vst1.32 {d1[0]}, [r3], r1 - vst1.32 {d1[1]}, [r2], r1 - vst1.32 {d2[0]}, [r3], r1 - vst1.32 {d2[1]}, [r2], r1 - vst1.32 {d3[0]}, [r3] - vst1.32 {d3[1]}, [r2] - - bx lr - - ENDP ; |idct_dequant_dc_full_2x_neon| - -; Constant Pool -cospi8sqrt2minus1 DCD 0x4e7b -; because the lowest bit in 0x8a8c is 0, we can pre-shift this -sinpi8sqrt2 DCD 0x4546 - - END diff --git a/vp9/decoder/arm/neon/vp9_idct_dequant_full_2x_neon.asm b/vp9/decoder/arm/neon/vp9_idct_dequant_full_2x_neon.asm deleted file mode 100644 index 772ec4685..000000000 --- a/vp9/decoder/arm/neon/vp9_idct_dequant_full_2x_neon.asm +++ /dev/null @@ -1,197 +0,0 @@ -; -; Copyright (c) 2010 The Webm project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |idct_dequant_full_2x_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void idct_dequant_full_2x_neon(short *q, short *dq, unsigned char *pre, -; unsigned char *dst, int pitch, int stride); -; r0 *q, -; r1 *dq, -; r2 *pre -; r3 *dst -; sp pitch -; sp+4 stride -|idct_dequant_full_2x_neon| PROC - vld1.16 {q0, q1}, [r1] ; dq (same l/r) - vld1.16 {q2, q3}, [r0] ; l q - ldr r1, [sp] ; pitch - add r0, r0, #32 - vld1.16 {q4, q5}, [r0] ; r q - add r12, r2, #4 - ; interleave the predictors - vld1.32 {d28[0]}, [r2], r1 ; l pre - vld1.32 {d28[1]}, [r12], r1 ; r pre - vld1.32 {d29[0]}, [r2], r1 - vld1.32 {d29[1]}, [r12], r1 - vld1.32 {d30[0]}, [r2], r1 - vld1.32 {d30[1]}, [r12], r1 - vld1.32 {d31[0]}, [r2] - vld1.32 {d31[1]}, [r12] - - adr r2, cospi8sqrt2minus1 ; pointer to the first constant - - ; dequant: q[i] = q[i] * dq[i] - vmul.i16 q2, q2, q0 - vmul.i16 q3, q3, q1 - vmul.i16 q4, q4, q0 - vmul.i16 q5, q5, q1 - - vld1.16 {d0}, [r2] - - ; q2: l0r0 q3: l8r8 - ; q4: l4r4 q5: l12r12 - vswp d5, d8 - vswp d7, d10 - - ; _CONSTANTS_ * 4,12 >> 16 - ; q6: 4 * sinpi : c1/temp1 - ; q7: 12 * sinpi : d1/temp2 - ; q8: 4 * cospi - ; q9: 12 * cospi - vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2 - vqdmulh.s16 q7, q5, d0[2] - vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1 - vqdmulh.s16 q9, q5, d0[0] - - vqadd.s16 q10, q2, q3 ; a1 = 0 + 8 - vqsub.s16 q11, q2, q3 ; b1 = 0 - 8 - - ; vqdmulh only accepts signed values. this was a problem because - ; our constant had the high bit set, and was treated as a negative value. - ; vqdmulh also doubles the value before it shifts by 16. we need to - ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0, - ; so we can shift the constant without losing precision. this avoids - ; shift again afterward, but also avoids the sign issue. win win! - ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we - ; pre-shift it - vshr.s16 q8, q8, #1 - vshr.s16 q9, q9, #1 - - ; q4: 4 + 4 * cospi : d1/temp1 - ; q5: 12 + 12 * cospi : c1/temp2 - vqadd.s16 q4, q4, q8 - vqadd.s16 q5, q5, q9 - - ; c1 = temp1 - temp2 - ; d1 = temp1 + temp2 - vqsub.s16 q2, q6, q5 - vqadd.s16 q3, q4, q7 - - ; [0]: a1+d1 - ; [1]: b1+c1 - ; [2]: b1-c1 - ; [3]: a1-d1 - vqadd.s16 q4, q10, q3 - vqadd.s16 q5, q11, q2 - vqsub.s16 q6, q11, q2 - vqsub.s16 q7, q10, q3 - - ; rotate - vtrn.32 q4, q6 - vtrn.32 q5, q7 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - ; idct loop 2 - ; q4: l 0, 4, 8,12 r 0, 4, 8,12 - ; q5: l 1, 5, 9,13 r 1, 5, 9,13 - ; q6: l 2, 6,10,14 r 2, 6,10,14 - ; q7: l 3, 7,11,15 r 3, 7,11,15 - - ; q8: 1 * sinpi : c1/temp1 - ; q9: 3 * sinpi : d1/temp2 - ; q10: 1 * cospi - ; q11: 3 * cospi - vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2 - vqdmulh.s16 q9, q7, d0[2] - vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1 - vqdmulh.s16 q11, q7, d0[0] - - vqadd.s16 q2, q4, q6 ; a1 = 0 + 2 - vqsub.s16 q3, q4, q6 ; b1 = 0 - 2 - - ; see note on shifting above - vshr.s16 q10, q10, #1 - vshr.s16 q11, q11, #1 - - ; q10: 1 + 1 * cospi : d1/temp1 - ; q11: 3 + 3 * cospi : c1/temp2 - vqadd.s16 q10, q5, q10 - vqadd.s16 q11, q7, q11 - - ; q8: c1 = temp1 - temp2 - ; q9: d1 = temp1 + temp2 - vqsub.s16 q8, q8, q11 - vqadd.s16 q9, q10, q9 - - ; a1+d1 - ; b1+c1 - ; b1-c1 - ; a1-d1 - vqadd.s16 q4, q2, q9 - vqadd.s16 q5, q3, q8 - vqsub.s16 q6, q3, q8 - vqsub.s16 q7, q2, q9 - - ; +4 >> 3 (rounding) - vrshr.s16 q4, q4, #3 ; lo - vrshr.s16 q5, q5, #3 - vrshr.s16 q6, q6, #3 ; hi - vrshr.s16 q7, q7, #3 - - vtrn.32 q4, q6 - vtrn.32 q5, q7 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - - ; adding pre - ; input is still packed. pre was read interleaved - vaddw.u8 q4, q4, d28 - vaddw.u8 q5, q5, d29 - vaddw.u8 q6, q6, d30 - vaddw.u8 q7, q7, d31 - - vmov.i16 q14, #0 - vmov q15, q14 - vst1.16 {q14, q15}, [r0] ; write over high input - sub r0, r0, #32 - vst1.16 {q14, q15}, [r0] ; write over low input - - ;saturate and narrow - vqmovun.s16 d0, q4 ; lo - vqmovun.s16 d1, q5 - vqmovun.s16 d2, q6 ; hi - vqmovun.s16 d3, q7 - - ldr r1, [sp, #4] ; stride - add r2, r3, #4 ; hi - vst1.32 {d0[0]}, [r3], r1 ; lo - vst1.32 {d0[1]}, [r2], r1 ; hi - vst1.32 {d1[0]}, [r3], r1 - vst1.32 {d1[1]}, [r2], r1 - vst1.32 {d2[0]}, [r3], r1 - vst1.32 {d2[1]}, [r2], r1 - vst1.32 {d3[0]}, [r3] - vst1.32 {d3[1]}, [r2] - - bx lr - - ENDP ; |idct_dequant_full_2x_neon| - -; Constant Pool -cospi8sqrt2minus1 DCD 0x4e7b -; because the lowest bit in 0x8a8c is 0, we can pre-shift this -sinpi8sqrt2 DCD 0x4546 - - END diff --git a/vp9/decoder/arm/vp9_dequantize_arm.c b/vp9/decoder/arm/vp9_dequantize_arm.c deleted file mode 100644 index ff1eec640..000000000 --- a/vp9/decoder/arm/vp9_dequantize_arm.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "vp9/decoder/vp9_dequantize.h" -#include "vp9/common/vp9_blockd.h" -#include "vpx_mem/vpx_mem.h" - -#if HAVE_ARMV7 -extern void vp9_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ); -#endif - -#if HAVE_ARMV6 -extern void vp9_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ); -#endif - -#if HAVE_ARMV7 - -void vp9_dequantize_b_neon(BLOCKD *d) { - short *DQ = d->dqcoeff; - short *Q = d->qcoeff; - short *DQC = d->dequant; - - vp9_dequantize_b_loop_neon(Q, DQC, DQ); -} -#endif - -#if HAVE_ARMV6 -void vp9_dequantize_b_v6(BLOCKD *d) { - short *DQ = d->dqcoeff; - short *Q = d->qcoeff; - short *DQC = d->dequant; - - vp9_dequantize_b_loop_v6(Q, DQC, DQ); -} -#endif diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index e92712bd6..e01910d53 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -28,9 +28,6 @@ #include "vpx_ports/vpx_timer.h" #include "vp9/decoder/vp9_decodframe.h" #include "vp9/decoder/vp9_detokenize.h" -#if ARCH_ARM -#include "vpx_ports/arm.h" -#endif static int get_free_fb(VP9_COMMON *cm); static void ref_cnt_fb(int *buf, int *idx, int new_idx); @@ -235,11 +232,6 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag, return pbi->common.error.error_code; } -/*For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.*/ -#if HAVE_ARMV7 -extern void vp9_push_neon(int64_t *store); -extern void vp9_pop_neon(int64_t *store); -#endif static int get_free_fb(VP9_COMMON *cm) { int i; @@ -317,9 +309,6 @@ static int swap_frame_buffers(VP9_COMMON *cm) { int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, const unsigned char **psource, int64_t time_stamp) { -#if HAVE_ARMV7 - int64_t dx_store_reg[8]; -#endif VP9D_COMP *pbi = (VP9D_COMP *) ptr; VP9_COMMON *cm = &pbi->common; const unsigned char *source = *psource; @@ -346,26 +335,9 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; } -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_push_neon(dx_store_reg); - } -#endif - cm->new_fb_idx = get_free_fb(cm); if (setjmp(pbi->common.error.jmp)) { -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_pop_neon(dx_store_reg); - } -#endif pbi->common.error.setjmp = 0; /* We do not know if the missing frame(s) was supposed to update @@ -384,14 +356,6 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, retcode = vp9_decode_frame(pbi, psource); if (retcode < 0) { -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_pop_neon(dx_store_reg); - } -#endif pbi->common.error.error_code = VPX_CODEC_ERROR; pbi->common.error.setjmp = 0; if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) @@ -401,14 +365,6 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, { if (swap_frame_buffers(cm)) { -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_pop_neon(dx_store_reg); - } -#endif pbi->common.error.error_code = VPX_CODEC_ERROR; pbi->common.error.setjmp = 0; return -1; @@ -455,14 +411,6 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, pbi->last_time_stamp = time_stamp; pbi->source_sz = 0; -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_pop_neon(dx_store_reg); - } -#endif pbi->common.error.setjmp = 0; return retcode; } diff --git a/vp9/encoder/arm/armv5te/vp9_boolhuff_armv5te.asm b/vp9/encoder/arm/armv5te/vp9_boolhuff_armv5te.asm deleted file mode 100644 index 94e65ef8d..000000000 --- a/vp9/encoder/arm/armv5te/vp9_boolhuff_armv5te.asm +++ /dev/null @@ -1,286 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_start_encode| - EXPORT |vp9_encode_bool| - EXPORT |vp8_stop_encode| - EXPORT |vp8_encode_value| - - INCLUDE vp9_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - -; r0 BOOL_CODER *br -; r1 unsigned char *source - -|vp8_start_encode| PROC - mov r12, #0 - mov r3, #255 - mvn r2, #23 - str r12, [r0, #vp9_writer_lowvalue] - str r3, [r0, #vp9_writer_range] - str r12, [r0, #vp9_writer_value] - str r2, [r0, #vp9_writer_count] - str r12, [r0, #vp9_writer_pos] - str r1, [r0, #vp9_writer_buffer] - bx lr - ENDP - -; r0 BOOL_CODER *br -; r1 int bit -; r2 int probability -|vp9_encode_bool| PROC - push {r4-r9, lr} - - mov r4, r2 - - ldr r2, [r0, #vp9_writer_lowvalue] - ldr r5, [r0, #vp9_writer_range] - ldr r3, [r0, #vp9_writer_count] - - sub r7, r5, #1 ; range-1 - - cmp r1, #0 - mul r6, r4, r7 ; ((range-1) * probability) - - mov r7, #1 - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8) - - addne r2, r2, r4 ; if (bit) lowvalue += split - subne r4, r5, r4 ; if (bit) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r9, #0 - strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r1, [r7, r4] - cmpge r1, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r9, [r7, r4] ; w->buffer[x] - add r9, r9, #1 - strb r9, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r9, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r1, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r1, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r9, r4] ; w->buffer[w->pos++] - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - str r2, [r0, #vp9_writer_lowvalue] - str r5, [r0, #vp9_writer_range] - str r3, [r0, #vp9_writer_count] - pop {r4-r9, pc} - ENDP - -; r0 BOOL_CODER *br -|vp8_stop_encode| PROC - push {r4-r10, lr} - - ldr r2, [r0, #vp9_writer_lowvalue] - ldr r5, [r0, #vp9_writer_range] - ldr r3, [r0, #vp9_writer_count] - - mov r10, #32 - -stop_encode_loop - sub r7, r5, #1 ; range-1 - - mov r4, r7, lsl #7 ; ((range-1) * 128) - - mov r7, #1 - add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero_se ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set_se - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start_se -token_zero_while_loop_se - mov r9, #0 - strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start_se - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r1, [r7, r4] - cmpge r1, #0xff - beq token_zero_while_loop_se - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r9, [r7, r4] ; w->buffer[x] - add r9, r9, #1 - strb r9, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set_se - rsb r4, r6, #24 ; 24-offset - ldr r9, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r1, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r1, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r9, r4] ; w->buffer[w->pos++] - -token_count_lt_zero_se - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r10, r10, #1 - bne stop_encode_loop - - str r2, [r0, #vp9_writer_lowvalue] - str r5, [r0, #vp9_writer_range] - str r3, [r0, #vp9_writer_count] - pop {r4-r10, pc} - - ENDP - -; r0 BOOL_CODER *br -; r1 int data -; r2 int bits -|vp8_encode_value| PROC - push {r4-r11, lr} - - mov r10, r2 - - ldr r2, [r0, #vp9_writer_lowvalue] - ldr r5, [r0, #vp9_writer_range] - ldr r3, [r0, #vp9_writer_count] - - rsb r4, r10, #32 ; 32-n - - ; v is kept in r1 during the token pack loop - lsl r1, r1, r4 ; r1 = v << 32 - n - -encode_value_loop - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r1, r1, #1 ; bit = v >> n - mov r4, r7, lsl #7 ; ((range-1) * 128) - - mov r7, #1 - add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) - - addcs r2, r2, r4 ; if (bit) lowvalue += split - subcs r4, r5, r4 ; if (bit) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero_ev ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set_ev - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start_ev -token_zero_while_loop_ev - mov r9, #0 - strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start_ev - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop_ev - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r9, [r7, r4] ; w->buffer[x] - add r9, r9, #1 - strb r9, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set_ev - rsb r4, r6, #24 ; 24-offset - ldr r9, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r9, r4] ; w->buffer[w->pos++] - -token_count_lt_zero_ev - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r10, r10, #1 - bne encode_value_loop - - str r2, [r0, #vp9_writer_lowvalue] - str r5, [r0, #vp9_writer_range] - str r3, [r0, #vp9_writer_count] - pop {r4-r11, pc} - ENDP - - END diff --git a/vp9/encoder/arm/armv5te/vp9_packtokens_armv5.asm b/vp9/encoder/arm/armv5te/vp9_packtokens_armv5.asm deleted file mode 100644 index 9ccbaa6c1..000000000 --- a/vp9/encoder/arm/armv5te/vp9_packtokens_armv5.asm +++ /dev/null @@ -1,291 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8cx_pack_tokens_armv5| - - INCLUDE vp9_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - -; r0 vp9_writer *w -; r1 const TOKENEXTRA *p -; r2 int xcount -; r3 vp8_coef_encodings -; s0 vp8_extra_bits -; s1 vp8_coef_tree -|vp8cx_pack_tokens_armv5| PROC - push {r4-r11, lr} - - ; Add size of xcount * sizeof (TOKENEXTRA) to get stop - ; sizeof (TOKENEXTRA) is 8 - sub sp, sp, #12 - add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA) - str r2, [sp, #0] - str r3, [sp, #8] ; save vp8_coef_encodings - ldr r2, [r0, #vp9_writer_lowvalue] - ldr r5, [r0, #vp9_writer_range] - ldr r3, [r0, #vp9_writer_count] - b check_p_lt_stop - -while_p_lt_stop - ldrb r6, [r1, #tokenextra_token] ; t - ldr r4, [sp, #8] ; vp8_coef_encodings - mov lr, #0 - add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t - ldr r9, [r1, #tokenextra_context_tree] ; pp - - ldrb r7, [r1, #tokenextra_skip_eob_node] - - ldr r6, [r4, #vp9_token_value] ; v - ldr r8, [r4, #vp9_token_len] ; n - - ; vp8 specific skip_eob_node - cmp r7, #0 - movne lr, #2 ; i = 2 - subne r8, r8, #1 ; --n - - rsb r4, r8, #32 ; 32-n - ldr r10, [sp, #52] ; vp8_coef_tree - - ; v is kept in r12 during the token pack loop - lsl r12, r6, r4 ; r12 = v << 32 - n - -; loop start -token_loop - ldrb r4, [r9, lr, asr #1] ; pp [i>>1] - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r12, r12, #1 ; bb = v >> n - mul r6, r4, r7 ; ((range-1) * pp[i>>1])) - - ; bb can only be 0 or 1. So only execute this statement - ; if bb == 1, otherwise it will act like i + 0 - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++] - - ; r10 is used earlier in the loop, but r10 is used as - ; temp variable here. So after r10 is used, reload - ; vp8_coef_tree_dcd into r10 - ldr r10, [sp, #52] ; vp8_coef_tree - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r8, r8, #1 ; --n - bne token_loop - - ldrb r6, [r1, #tokenextra_token] ; t - ldr r7, [sp, #48] ; vp8_extra_bits - ; Add t * sizeof (vp9_extra_bit_struct) to get the desired - ; element. Here vp9_extra_bit_struct == 16 - add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t - - ldr r4, [r12, #vp9_extra_bit_struct_base_val] - cmp r4, #0 - beq skip_extra_bits - -; if( b->base_val) - ldr r8, [r12, #vp9_extra_bit_struct_len] ; L - ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra - cmp r8, #0 ; if( L) - beq no_extra_bits - - ldr r9, [r12, #vp9_extra_bit_struct_prob] - asr r7, lr, #1 ; v=e>>1 - - ldr r10, [r12, #vp9_extra_bit_struct_tree] - str r10, [sp, #4] ; b->tree - - rsb r4, r8, #32 - lsl r12, r7, r4 - - mov lr, #0 ; i = 0 - -extra_bits_loop - ldrb r4, [r9, lr, asr #1] ; pp[i>>1] - sub r7, r5, #1 ; range-1 - lsls r12, r12, #1 ; v >> n - mul r6, r4, r7 ; (range-1) * pp[i>>1] - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = b->tree[i+bb] - add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - clz r6, r4 - sub r6, r6, #24 - - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi extra_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset= shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl extra_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos - 1 - b extra_zero_while_start -extra_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -extra_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq extra_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] - add r10, r10, #1 - strb r10, [r7, r4] -extra_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) - ldr r10, [sp, #4] ; b->tree -extra_count_lt_zero - lsl r2, r2, r6 - - subs r8, r8, #1 ; --n - bne extra_bits_loop ; while (n) - -no_extra_bits - ldr lr, [r1, #4] ; e = p->Extra - add r4, r5, #1 ; range + 1 - tst lr, #1 - lsr r4, r4, #1 ; split = (range + 1) >> 1 - addne r2, r2, r4 ; lowvalue += split - subne r4, r5, r4 ; range = range-split - tst r2, #0x80000000 ; lowvalue & 0x80000000 - lsl r5, r4, #1 ; range <<= 1 - beq end_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] - mov r7, #0 - sub r4, r4, #1 - b end_zero_while_start -end_zero_while_loop - strb r7, [r6, r4] - sub r4, r4, #1 ; x-- -end_zero_while_start - cmp r4, #0 - ldrge r6, [r0, #vp9_writer_buffer] - ldrb r12, [r6, r4] - cmpge r12, #0xff - beq end_zero_while_loop - - ldr r6, [r0, #vp9_writer_buffer] - ldrb r7, [r6, r4] - add r7, r7, #1 - strb r7, [r6, r4] -end_high_bit_not_set - adds r3, r3, #1 ; ++count - lsl r2, r2, #1 ; lowvalue <<= 1 - bne end_count_zero - - ldr r4, [r0, #vp9_writer_pos] - mvn r3, #7 - ldr r7, [r0, #vp9_writer_buffer] - lsr r6, r2, #24 ; lowvalue >> 24 - add r12, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r12, [r0, #0x10] - strb r6, [r7, r4] -end_count_zero -skip_extra_bits - add r1, r1, #TOKENEXTRA_SZ ; ++p -check_p_lt_stop - ldr r4, [sp, #0] ; stop - cmp r1, r4 ; while( p < stop) - bcc while_p_lt_stop - - str r2, [r0, #vp9_writer_lowvalue] - str r5, [r0, #vp9_writer_range] - str r3, [r0, #vp9_writer_count] - add sp, sp, #12 - pop {r4-r11, pc} - ENDP - - END diff --git a/vp9/encoder/arm/armv5te/vp9_packtokens_mbrow_armv5.asm b/vp9/encoder/arm/armv5te/vp9_packtokens_mbrow_armv5.asm deleted file mode 100644 index 0938ce1a3..000000000 --- a/vp9/encoder/arm/armv5te/vp9_packtokens_mbrow_armv5.asm +++ /dev/null @@ -1,327 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8cx_pack_mb_row_tokens_armv5| - - INCLUDE vp9_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - -; r0 VP8_COMP *cpi -; r1 vp9_writer *w -; r2 vp8_coef_encodings -; r3 vp8_extra_bits -; s0 vp8_coef_tree - -|vp8cx_pack_mb_row_tokens_armv5| PROC - push {r4-r11, lr} - sub sp, sp, #24 - - ; Compute address of cpi->common.mb_rows - ldr r4, _VP8_COMP_common_ - ldr r6, _VP8_COMMON_MBrows_ - add r4, r0, r4 - - ldr r5, [r4, r6] ; load up mb_rows - - str r2, [sp, #20] ; save vp8_coef_encodings - str r5, [sp, #12] ; save mb_rows - str r3, [sp, #8] ; save vp8_extra_bits - - ldr r4, _VP8_COMP_tplist_ - add r4, r0, r4 - ldr r7, [r4, #0] ; dereference cpi->tp_list - - mov r0, r1 ; keep same as other loops - - ldr r2, [r0, #vp9_writer_lowvalue] - ldr r5, [r0, #vp9_writer_range] - ldr r3, [r0, #vp9_writer_count] - -mb_row_loop - - ldr r1, [r7, #tokenlist_start] - ldr r9, [r7, #tokenlist_stop] - str r9, [sp, #0] ; save stop for later comparison - str r7, [sp, #16] ; tokenlist address for next time - - b check_p_lt_stop - - ; actuall work gets done here! - -while_p_lt_stop - ldrb r6, [r1, #tokenextra_token] ; t - ldr r4, [sp, #20] ; vp8_coef_encodings - mov lr, #0 - add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t - ldr r9, [r1, #tokenextra_context_tree] ; pp - - ldrb r7, [r1, #tokenextra_skip_eob_node] - - ldr r6, [r4, #vp9_token_value] ; v - ldr r8, [r4, #vp9_token_len] ; n - - ; vp8 specific skip_eob_node - cmp r7, #0 - movne lr, #2 ; i = 2 - subne r8, r8, #1 ; --n - - rsb r4, r8, #32 ; 32-n - ldr r10, [sp, #60] ; vp8_coef_tree - - ; v is kept in r12 during the token pack loop - lsl r12, r6, r4 ; r12 = v << 32 - n - -; loop start -token_loop - ldrb r4, [r9, lr, asr #1] ; pp [i>>1] - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r12, r12, #1 ; bb = v >> n - mul r6, r4, r7 ; ((range-1) * pp[i>>1])) - - ; bb can only be 0 or 1. So only execute this statement - ; if bb == 1, otherwise it will act like i + 0 - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++] - - ; r10 is used earlier in the loop, but r10 is used as - ; temp variable here. So after r10 is used, reload - ; vp8_coef_tree_dcd into r10 - ldr r10, [sp, #60] ; vp8_coef_tree - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r8, r8, #1 ; --n - bne token_loop - - ldrb r6, [r1, #tokenextra_token] ; t - ldr r7, [sp, #8] ; vp8_extra_bits - ; Add t * sizeof (vp9_extra_bit_struct) to get the desired - ; element. Here vp9_extra_bit_struct == 16 - add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t - - ldr r4, [r12, #vp9_extra_bit_struct_base_val] - cmp r4, #0 - beq skip_extra_bits - -; if( b->base_val) - ldr r8, [r12, #vp9_extra_bit_struct_len] ; L - ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra - cmp r8, #0 ; if( L) - beq no_extra_bits - - ldr r9, [r12, #vp9_extra_bit_struct_prob] - asr r7, lr, #1 ; v=e>>1 - - ldr r10, [r12, #vp9_extra_bit_struct_tree] - str r10, [sp, #4] ; b->tree - - rsb r4, r8, #32 - lsl r12, r7, r4 - - mov lr, #0 ; i = 0 - -extra_bits_loop - ldrb r4, [r9, lr, asr #1] ; pp[i>>1] - sub r7, r5, #1 ; range-1 - lsls r12, r12, #1 ; v >> n - mul r6, r4, r7 ; (range-1) * pp[i>>1] - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = b->tree[i+bb] - add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - clz r6, r4 - sub r6, r6, #24 - - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi extra_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset= shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl extra_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos - 1 - b extra_zero_while_start -extra_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -extra_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq extra_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] - add r10, r10, #1 - strb r10, [r7, r4] -extra_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) - ldr r10, [sp, #4] ; b->tree -extra_count_lt_zero - lsl r2, r2, r6 - - subs r8, r8, #1 ; --n - bne extra_bits_loop ; while (n) - -no_extra_bits - ldr lr, [r1, #4] ; e = p->Extra - add r4, r5, #1 ; range + 1 - tst lr, #1 - lsr r4, r4, #1 ; split = (range + 1) >> 1 - addne r2, r2, r4 ; lowvalue += split - subne r4, r5, r4 ; range = range-split - tst r2, #0x80000000 ; lowvalue & 0x80000000 - lsl r5, r4, #1 ; range <<= 1 - beq end_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] - mov r7, #0 - sub r4, r4, #1 - b end_zero_while_start -end_zero_while_loop - strb r7, [r6, r4] - sub r4, r4, #1 ; x-- -end_zero_while_start - cmp r4, #0 - ldrge r6, [r0, #vp9_writer_buffer] - ldrb r12, [r6, r4] - cmpge r12, #0xff - beq end_zero_while_loop - - ldr r6, [r0, #vp9_writer_buffer] - ldrb r7, [r6, r4] - add r7, r7, #1 - strb r7, [r6, r4] -end_high_bit_not_set - adds r3, r3, #1 ; ++count - lsl r2, r2, #1 ; lowvalue <<= 1 - bne end_count_zero - - ldr r4, [r0, #vp9_writer_pos] - mvn r3, #7 - ldr r7, [r0, #vp9_writer_buffer] - lsr r6, r2, #24 ; lowvalue >> 24 - add r12, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r12, [r0, #0x10] - strb r6, [r7, r4] -end_count_zero -skip_extra_bits - add r1, r1, #TOKENEXTRA_SZ ; ++p -check_p_lt_stop - ldr r4, [sp, #0] ; stop - cmp r1, r4 ; while( p < stop) - bcc while_p_lt_stop - - ldr r6, [sp, #12] ; mb_rows - ldr r7, [sp, #16] ; tokenlist address - subs r6, r6, #1 - add r7, r7, #TOKENLIST_SZ ; next element in the array - str r6, [sp, #12] - bne mb_row_loop - - str r2, [r0, #vp9_writer_lowvalue] - str r5, [r0, #vp9_writer_range] - str r3, [r0, #vp9_writer_count] - add sp, sp, #24 - pop {r4-r11, pc} - ENDP - -_VP8_COMP_common_ - DCD vp8_comp_common -_VP8_COMMON_MBrows_ - DCD vp8_common_mb_rows -_VP8_COMP_tplist_ - DCD vp8_comp_tplist - - END diff --git a/vp9/encoder/arm/armv5te/vp9_packtokens_partitions_armv5.asm b/vp9/encoder/arm/armv5te/vp9_packtokens_partitions_armv5.asm deleted file mode 100644 index 4611b407d..000000000 --- a/vp9/encoder/arm/armv5te/vp9_packtokens_partitions_armv5.asm +++ /dev/null @@ -1,465 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8cx_pack_tokens_into_partitions_armv5| - - INCLUDE vp9_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - -; r0 VP8_COMP *cpi -; r1 unsigned char *cx_data -; r2 int num_part -; r3 *size -; s0 vp8_coef_encodings -; s1 vp8_extra_bits, -; s2 const vp9_tree_index *, - -|vp8cx_pack_tokens_into_partitions_armv5| PROC - push {r4-r11, lr} - sub sp, sp, #44 - - ; Compute address of cpi->common.mb_rows - ldr r4, _VP8_COMP_common_ - ldr r6, _VP8_COMMON_MBrows_ - add r4, r0, r4 - - ldr r5, [r4, r6] ; load up mb_rows - - str r5, [sp, #36] ; save mb_rows - str r1, [sp, #24] ; save cx_data - str r2, [sp, #20] ; save num_part - str r3, [sp, #8] ; save *size - - ; *size = 3*(num_part -1 ); - sub r2, r2, #1 ; num_part - 1 - add r2, r2, r2, lsl #1 ; 3*(num_part - 1) - str r2, [r3] - - add r2, r2, r1 ; cx_data + *size - str r2, [sp, #40] ; ptr - - ldr r4, _VP8_COMP_tplist_ - add r4, r0, r4 - ldr r7, [r4, #0] ; dereference cpi->tp_list - str r7, [sp, #32] ; store start of cpi->tp_list - - ldr r11, _VP8_COMP_bc2_ ; load up vp9_writer out of cpi - add r0, r0, r11 - - mov r11, #0 - str r11, [sp, #28] ; i - -numparts_loop - ldr r10, [sp, #40] ; ptr - ldr r5, [sp, #36] ; move mb_rows to the counting section - sub r5, r5, r11 ; move start point with each partition - ; mb_rows starts at i - str r5, [sp, #12] - - ; Reset all of the VP8 Writer data for each partition that - ; is processed. - ; start_encode - mov r2, #0 ; vp9_writer_lowvalue - mov r5, #255 ; vp9_writer_range - mvn r3, #23 ; vp9_writer_count - - str r2, [r0, #vp9_writer_value] - str r2, [r0, #vp9_writer_pos] - str r10, [r0, #vp9_writer_buffer] - -mb_row_loop - - ldr r1, [r7, #tokenlist_start] - ldr r9, [r7, #tokenlist_stop] - str r9, [sp, #0] ; save stop for later comparison - str r7, [sp, #16] ; tokenlist address for next time - - b check_p_lt_stop - - ; actual work gets done here! - -while_p_lt_stop - ldrb r6, [r1, #tokenextra_token] ; t - ldr r4, [sp, #80] ; vp8_coef_encodings - mov lr, #0 - add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t - ldr r9, [r1, #tokenextra_context_tree] ; pp - - ldrb r7, [r1, #tokenextra_skip_eob_node] - - ldr r6, [r4, #vp9_token_value] ; v - ldr r8, [r4, #vp9_token_len] ; n - - ; vp8 specific skip_eob_node - cmp r7, #0 - movne lr, #2 ; i = 2 - subne r8, r8, #1 ; --n - - rsb r4, r8, #32 ; 32-n - ldr r10, [sp, #88] ; vp8_coef_tree - - ; v is kept in r12 during the token pack loop - lsl r12, r6, r4 ; r12 = v << 32 - n - -; loop start -token_loop - ldrb r4, [r9, lr, asr #1] ; pp [i>>1] - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r12, r12, #1 ; bb = v >> n - mul r6, r4, r7 ; ((range-1) * pp[i>>1])) - - ; bb can only be 0 or 1. So only execute this statement - ; if bb == 1, otherwise it will act like i + 0 - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++] - - ; r10 is used earlier in the loop, but r10 is used as - ; temp variable here. So after r10 is used, reload - ; vp8_coef_tree_dcd into r10 - ldr r10, [sp, #88] ; vp8_coef_tree - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r8, r8, #1 ; --n - bne token_loop - - ldrb r6, [r1, #tokenextra_token] ; t - ldr r7, [sp, #84] ; vp8_extra_bits - ; Add t * sizeof (vp9_extra_bit_struct) to get the desired - ; element. Here vp9_extra_bit_struct == 16 - add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t - - ldr r4, [r12, #vp9_extra_bit_struct_base_val] - cmp r4, #0 - beq skip_extra_bits - -; if( b->base_val) - ldr r8, [r12, #vp9_extra_bit_struct_len] ; L - ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra - cmp r8, #0 ; if( L) - beq no_extra_bits - - ldr r9, [r12, #vp9_extra_bit_struct_prob] - asr r7, lr, #1 ; v=e>>1 - - ldr r10, [r12, #vp9_extra_bit_struct_tree] - str r10, [sp, #4] ; b->tree - - rsb r4, r8, #32 - lsl r12, r7, r4 - - mov lr, #0 ; i = 0 - -extra_bits_loop - ldrb r4, [r9, lr, asr #1] ; pp[i>>1] - sub r7, r5, #1 ; range-1 - lsls r12, r12, #1 ; v >> n - mul r6, r4, r7 ; (range-1) * pp[i>>1] - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = b->tree[i+bb] - add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - clz r6, r4 - sub r6, r6, #24 - - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi extra_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset= shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl extra_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos - 1 - b extra_zero_while_start -extra_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -extra_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq extra_zero_while_loop - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] - add r10, r10, #1 - strb r10, [r7, r4] -extra_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) - ldr r10, [sp, #4] ; b->tree -extra_count_lt_zero - lsl r2, r2, r6 - - subs r8, r8, #1 ; --n - bne extra_bits_loop ; while (n) - -no_extra_bits - ldr lr, [r1, #4] ; e = p->Extra - add r4, r5, #1 ; range + 1 - tst lr, #1 - lsr r4, r4, #1 ; split = (range + 1) >> 1 - addne r2, r2, r4 ; lowvalue += split - subne r4, r5, r4 ; range = range-split - tst r2, #0x80000000 ; lowvalue & 0x80000000 - lsl r5, r4, #1 ; range <<= 1 - beq end_high_bit_not_set - - ldr r4, [r0, #vp9_writer_pos] - mov r7, #0 - sub r4, r4, #1 - b end_zero_while_start -end_zero_while_loop - strb r7, [r6, r4] - sub r4, r4, #1 ; x-- -end_zero_while_start - cmp r4, #0 - ldrge r6, [r0, #vp9_writer_buffer] - ldrb r12, [r6, r4] - cmpge r12, #0xff - beq end_zero_while_loop - - ldr r6, [r0, #vp9_writer_buffer] - ldrb r7, [r6, r4] - add r7, r7, #1 - strb r7, [r6, r4] -end_high_bit_not_set - adds r3, r3, #1 ; ++count - lsl r2, r2, #1 ; lowvalue <<= 1 - bne end_count_zero - - ldr r4, [r0, #vp9_writer_pos] - mvn r3, #7 - ldr r7, [r0, #vp9_writer_buffer] - lsr r6, r2, #24 ; lowvalue >> 24 - add r12, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r12, [r0, #0x10] - strb r6, [r7, r4] -end_count_zero -skip_extra_bits - add r1, r1, #TOKENEXTRA_SZ ; ++p -check_p_lt_stop - ldr r4, [sp, #0] ; stop - cmp r1, r4 ; while( p < stop) - bcc while_p_lt_stop - - ldr r10, [sp, #20] ; num_parts - mov r1, #TOKENLIST_SZ - mul r1, r10, r1 - - ldr r6, [sp, #12] ; mb_rows - ldr r7, [sp, #16] ; tokenlist address - subs r6, r6, r10 - add r7, r7, r1 ; next element in the array - str r6, [sp, #12] - bgt mb_row_loop - - mov r12, #32 - -stop_encode_loop - sub r7, r5, #1 ; range-1 - - mov r4, r7, lsl #7 ; ((range-1) * 128) - - mov r7, #1 - add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero_se ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set_se - - ldr r4, [r0, #vp9_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start_se -token_zero_while_loop_se - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start_se - cmp r4, #0 - ldrge r7, [r0, #vp9_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop_se - - ldr r7, [r0, #vp9_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set_se - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp9_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp9_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp9_writer_pos] - sub r3, r3, #8 ; count -= 8 - strb r7, [r10, r4] ; w->buffer[w->pos++] - -token_count_lt_zero_se - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r12, r12, #1 - bne stop_encode_loop - - ldr r10, [sp, #8] ; *size - ldr r11, [r10] - ldr r4, [r0, #vp9_writer_pos] ; w->pos - add r11, r11, r4 ; *size += w->pos - str r11, [r10] - - ldr r9, [sp, #20] ; num_parts - sub r9, r9, #1 - ldr r10, [sp, #28] ; i - cmp r10, r9 ; if(i<(num_part - 1)) - bge skip_write_partition - - ldr r12, [sp, #40] ; ptr - add r12, r12, r4 ; ptr += w->pos - str r12, [sp, #40] - - ldr r9, [sp, #24] ; cx_data - mov r8, r4, asr #8 - strb r4, [r9, #0] - strb r8, [r9, #1] - mov r4, r4, asr #16 - strb r4, [r9, #2] - - add r9, r9, #3 ; cx_data += 3 - str r9, [sp, #24] - -skip_write_partition - - ldr r11, [sp, #28] ; i - ldr r10, [sp, #20] ; num_parts - - add r11, r11, #1 ; i++ - str r11, [sp, #28] - - ldr r7, [sp, #32] ; cpi->tp_list[i] - mov r1, #TOKENLIST_SZ - add r7, r7, r1 ; next element in cpi->tp_list - str r7, [sp, #32] ; cpi->tp_list[i+1] - - cmp r10, r11 - bgt numparts_loop - - - add sp, sp, #44 - pop {r4-r11, pc} - ENDP - -_VP8_COMP_common_ - DCD vp8_comp_common -_VP8_COMMON_MBrows_ - DCD vp8_common_mb_rows -_VP8_COMP_tplist_ - DCD vp8_comp_tplist -_VP8_COMP_bc2_ - DCD vp8_comp_bc2 - - END diff --git a/vp9/encoder/arm/armv6/vp9_fast_quantize_b_armv6.asm b/vp9/encoder/arm/armv6/vp9_fast_quantize_b_armv6.asm deleted file mode 100644 index 4f75ef5e7..000000000 --- a/vp9/encoder/arm/armv6/vp9_fast_quantize_b_armv6.asm +++ /dev/null @@ -1,223 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_fast_quantize_b_armv6| - - INCLUDE vp9_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 BLOCK *b -; r1 BLOCKD *d -|vp8_fast_quantize_b_armv6| PROC - stmfd sp!, {r1, r4-r11, lr} - - ldr r3, [r0, #vp8_block_coeff] ; coeff - ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast - ldr r5, [r0, #vp8_block_round] ; round - ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff - ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff - ldr r8, [r1, #vp8_blockd_dequant] ; dequant - - ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction - ; is used to update the counter so that - ; it can be used to mark nonzero - ; quantized coefficient pairs. - - mov r1, #0 ; flags for quantized coeffs - - ; PART 1: quantization and dequantization loop -loop - ldr r9, [r3], #4 ; [z1 | z0] - ldr r10, [r5], #4 ; [r1 | r0] - ldr r11, [r4], #4 ; [q1 | q0] - - ssat16 lr, #1, r9 ; [sz1 | sz0] - eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0] - ssub16 r9, r9, lr ; x = (z ^ sz) - sz - sadd16 r9, r9, r10 ; [x1+r1 | x0+r0] - - ldr r12, [r3], #4 ; [z3 | z2] - - smulbb r0, r9, r11 ; [(x0+r0)*q0] - smultt r9, r9, r11 ; [(x1+r1)*q1] - - ldr r10, [r5], #4 ; [r3 | r2] - - ssat16 r11, #1, r12 ; [sz3 | sz2] - eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2] - pkhtb r0, r9, r0, asr #16 ; [y1 | y0] - ldr r9, [r4], #4 ; [q3 | q2] - ssub16 r12, r12, r11 ; x = (z ^ sz) - sz - - sadd16 r12, r12, r10 ; [x3+r3 | x2+r2] - - eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)] - - smulbb r10, r12, r9 ; [(x2+r2)*q2] - smultt r12, r12, r9 ; [(x3+r3)*q3] - - ssub16 r0, r0, lr ; x = (y ^ sz) - sz - - cmp r0, #0 ; check if zero - orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs - - str r0, [r6], #4 ; *qcoeff++ = x - ldr r9, [r8], #4 ; [dq1 | dq0] - - pkhtb r10, r12, r10, asr #16 ; [y3 | y2] - eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)] - ssub16 r10, r10, r11 ; x = (y ^ sz) - sz - - cmp r10, #0 ; check if zero - orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs - - str r10, [r6], #4 ; *qcoeff++ = x - ldr r11, [r8], #4 ; [dq3 | dq2] - - smulbb r12, r0, r9 ; [x0*dq0] - smultt r0, r0, r9 ; [x1*dq1] - - smulbb r9, r10, r11 ; [x2*dq2] - smultt r10, r10, r11 ; [x3*dq3] - - lsls r2, r2, #2 ; update loop counter - strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0] - strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1] - strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2] - strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3] - add r7, r7, #8 ; dqcoeff += 8 - bne loop - - ; PART 2: check position for eob... - mov lr, #0 ; init eob - cmp r1, #0 ; coeffs after quantization? - ldr r11, [sp, #0] ; restore BLOCKD pointer - beq end ; skip eob calculations if all zero - - ldr r0, [r11, #vp8_blockd_qcoeff] - - ; check shortcut for nonzero qcoeffs - tst r1, #0x80 - bne quant_coeff_15_14 - tst r1, #0x20 - bne quant_coeff_13_11 - tst r1, #0x8 - bne quant_coeff_12_7 - tst r1, #0x40 - bne quant_coeff_10_9 - tst r1, #0x10 - bne quant_coeff_8_3 - tst r1, #0x2 - bne quant_coeff_6_5 - tst r1, #0x4 - bne quant_coeff_4_2 - b quant_coeff_1_0 - -quant_coeff_15_14 - ldrh r2, [r0, #30] ; rc=15, i=15 - mov lr, #16 - cmp r2, #0 - bne end - - ldrh r3, [r0, #28] ; rc=14, i=14 - mov lr, #15 - cmp r3, #0 - bne end - -quant_coeff_13_11 - ldrh r2, [r0, #22] ; rc=11, i=13 - mov lr, #14 - cmp r2, #0 - bne end - -quant_coeff_12_7 - ldrh r3, [r0, #14] ; rc=7, i=12 - mov lr, #13 - cmp r3, #0 - bne end - - ldrh r2, [r0, #20] ; rc=10, i=11 - mov lr, #12 - cmp r2, #0 - bne end - -quant_coeff_10_9 - ldrh r3, [r0, #26] ; rc=13, i=10 - mov lr, #11 - cmp r3, #0 - bne end - - ldrh r2, [r0, #24] ; rc=12, i=9 - mov lr, #10 - cmp r2, #0 - bne end - -quant_coeff_8_3 - ldrh r3, [r0, #18] ; rc=9, i=8 - mov lr, #9 - cmp r3, #0 - bne end - - ldrh r2, [r0, #12] ; rc=6, i=7 - mov lr, #8 - cmp r2, #0 - bne end - -quant_coeff_6_5 - ldrh r3, [r0, #6] ; rc=3, i=6 - mov lr, #7 - cmp r3, #0 - bne end - - ldrh r2, [r0, #4] ; rc=2, i=5 - mov lr, #6 - cmp r2, #0 - bne end - -quant_coeff_4_2 - ldrh r3, [r0, #10] ; rc=5, i=4 - mov lr, #5 - cmp r3, #0 - bne end - - ldrh r2, [r0, #16] ; rc=8, i=3 - mov lr, #4 - cmp r2, #0 - bne end - - ldrh r3, [r0, #8] ; rc=4, i=2 - mov lr, #3 - cmp r3, #0 - bne end - -quant_coeff_1_0 - ldrh r2, [r0, #2] ; rc=1, i=1 - mov lr, #2 - cmp r2, #0 - bne end - - mov lr, #1 ; rc=0, i=0 - -end - str lr, [r11, #vp8_blockd_eob] - ldmfd sp!, {r1, r4-r11, pc} - - ENDP - -loop_count - DCD 0x1000000 - - END diff --git a/vp9/encoder/arm/armv6/vp9_mse16x16_armv6.asm b/vp9/encoder/arm/armv6/vp9_mse16x16_armv6.asm deleted file mode 100644 index 8e7283667..000000000 --- a/vp9/encoder/arm/armv6/vp9_mse16x16_armv6.asm +++ /dev/null @@ -1,138 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_mse16x16_armv6| - - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -; -;note: Based on vp9_variance16x16_armv6. In this function, sum is never used. -; So, we can remove this part of calculation. - -|vp8_mse16x16_armv6| PROC - - push {r4-r9, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r12, #16 ; set loop counter to 16 (=block height) - mov r4, #0 ; initialize sse = 0 - -loop - ; 1st 4 pixels - ldr r5, [r0, #0x0] ; load 4 src pixels - ldr r6, [r2, #0x0] ; load 4 ref pixels - - mov lr, #0 ; constant zero - - usub8 r8, r5, r6 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - ldr r5, [r0, #0x4] ; load 4 src pixels - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r6, [r2, #0x4] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - ldr r5, [r0, #0x8] ; load 4 src pixels - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r6, [r2, #0x8] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - ldr r5, [r0, #0xc] ; load 4 src pixels - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r6, [r2, #0xc] ; load 4 ref pixels - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r5, r6 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r8, lr ; select bytes with positive difference - usub8 r9, r6, r5 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r5, r7, lr ; calculate sum of positive differences - usad8 r6, r8, lr ; calculate sum of negative differences - orr r8, r8, r7 ; differences of all 4 pixels - - subs r12, r12, #1 ; next row - - ; calculate sse - uxtb16 r6, r8 ; byte (two pixels) to halfwords - uxtb16 r7, r8, ror #8 ; another two pixels to halfwords - smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) - smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) - - bne loop - - ; return stuff - ldr r1, [sp, #28] ; get address of sse - mov r0, r4 ; return sse - str r4, [r1] ; store sse - - pop {r4-r9, pc} - - ENDP - - END diff --git a/vp9/encoder/arm/armv6/vp9_sad16x16_armv6.asm b/vp9/encoder/arm/armv6/vp9_sad16x16_armv6.asm deleted file mode 100644 index 4dcceb2bf..000000000 --- a/vp9/encoder/arm/armv6/vp9_sad16x16_armv6.asm +++ /dev/null @@ -1,95 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sad16x16_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 const unsigned char *src_ptr -; r1 int src_stride -; r2 const unsigned char *ref_ptr -; r3 int ref_stride -; stack max_sad (not used) -|vp8_sad16x16_armv6| PROC - stmfd sp!, {r4-r12, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - pld [r0, r1, lsl #1] - pld [r2, r3, lsl #1] - - mov r4, #0 ; sad = 0; - mov r5, #8 ; loop count - -loop - ; 1st row - ldr r6, [r0, #0x0] ; load 4 src pixels (1A) - ldr r8, [r2, #0x0] ; load 4 ref pixels (1A) - ldr r7, [r0, #0x4] ; load 4 src pixels (1A) - ldr r9, [r2, #0x4] ; load 4 ref pixels (1A) - ldr r10, [r0, #0x8] ; load 4 src pixels (1B) - ldr r11, [r0, #0xC] ; load 4 src pixels (1B) - - usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels - usad8 r8, r7, r9 ; calculate sad for 4 pixels - - ldr r12, [r2, #0x8] ; load 4 ref pixels (1B) - ldr lr, [r2, #0xC] ; load 4 ref pixels (1B) - - add r0, r0, r1 ; set src pointer to next row - add r2, r2, r3 ; set dst pointer to next row - - pld [r0, r1, lsl #1] - pld [r2, r3, lsl #1] - - usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels - usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels - - ldr r6, [r0, #0x0] ; load 4 src pixels (2A) - ldr r7, [r0, #0x4] ; load 4 src pixels (2A) - add r4, r4, r8 ; add partial sad values - - ; 2nd row - ldr r8, [r2, #0x0] ; load 4 ref pixels (2A) - ldr r9, [r2, #0x4] ; load 4 ref pixels (2A) - ldr r10, [r0, #0x8] ; load 4 src pixels (2B) - ldr r11, [r0, #0xC] ; load 4 src pixels (2B) - - usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels - usad8 r8, r7, r9 ; calculate sad for 4 pixels - - ldr r12, [r2, #0x8] ; load 4 ref pixels (2B) - ldr lr, [r2, #0xC] ; load 4 ref pixels (2B) - - add r0, r0, r1 ; set src pointer to next row - add r2, r2, r3 ; set dst pointer to next row - - usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels - usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels - - pld [r0, r1, lsl #1] - pld [r2, r3, lsl #1] - - subs r5, r5, #1 ; decrement loop counter - add r4, r4, r8 ; add partial sad values - - bne loop - - mov r0, r4 ; return sad - ldmfd sp!, {r4-r12, pc} - - ENDP - - END diff --git a/vp9/encoder/arm/armv6/vp9_short_fdct4x4_armv6.asm b/vp9/encoder/arm/armv6/vp9_short_fdct4x4_armv6.asm deleted file mode 100644 index 8034c1db9..000000000 --- a/vp9/encoder/arm/armv6/vp9_short_fdct4x4_armv6.asm +++ /dev/null @@ -1,262 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp8_short_fdct4x4_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY -; void vp8_short_fdct4x4_c(short *input, short *output, int pitch) -|vp8_short_fdct4x4_armv6| PROC - - stmfd sp!, {r4 - r12, lr} - - ; PART 1 - - ; coeffs 0-3 - ldrd r4, r5, [r0] ; [i1 | i0] [i3 | i2] - - ldr r10, c7500 - ldr r11, c14500 - ldr r12, c0x22a453a0 ; [2217*4 | 5352*4] - ldr lr, c0x00080008 - ror r5, r5, #16 ; [i2 | i3] - - qadd16 r6, r4, r5 ; [i1+i2 | i0+i3] = [b1 | a1] without shift - qsub16 r7, r4, r5 ; [i1-i2 | i0-i3] = [c1 | d1] without shift - - add r0, r0, r2 ; update input pointer - - qadd16 r7, r7, r7 ; 2*[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r4, r6, lr ; o0 = (i1+i2)*8 + (i0+i3)*8 - smusd r5, r6, lr ; o2 = (i1+i2)*8 - (i0+i3)*8 - - smlad r6, r7, r12, r11 ; o1 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o3 = (d1 * 2217 - c1 * 5352 + 7500) - - ldrd r8, r9, [r0] ; [i5 | i4] [i7 | i6] - - pkhbt r3, r4, r6, lsl #4 ; [o1 | o0], keep in register for PART 2 - pkhbt r6, r5, r7, lsl #4 ; [o3 | o2] - - str r6, [r1, #4] - - ; coeffs 4-7 - ror r9, r9, #16 ; [i6 | i7] - - qadd16 r6, r8, r9 ; [i5+i6 | i4+i7] = [b1 | a1] without shift - qsub16 r7, r8, r9 ; [i5-i6 | i4-i7] = [c1 | d1] without shift - - add r0, r0, r2 ; update input pointer - - qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r9, r6, lr ; o4 = (i5+i6)*8 + (i4+i7)*8 - smusd r8, r6, lr ; o6 = (i5+i6)*8 - (i4+i7)*8 - - smlad r6, r7, r12, r11 ; o5 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o7 = (d1 * 2217 - c1 * 5352 + 7500) - - ldrd r4, r5, [r0] ; [i9 | i8] [i11 | i10] - - pkhbt r9, r9, r6, lsl #4 ; [o5 | o4], keep in register for PART 2 - pkhbt r6, r8, r7, lsl #4 ; [o7 | o6] - - str r6, [r1, #12] - - ; coeffs 8-11 - ror r5, r5, #16 ; [i10 | i11] - - qadd16 r6, r4, r5 ; [i9+i10 | i8+i11]=[b1 | a1] without shift - qsub16 r7, r4, r5 ; [i9-i10 | i8-i11]=[c1 | d1] without shift - - add r0, r0, r2 ; update input pointer - - qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r2, r6, lr ; o8 = (i9+i10)*8 + (i8+i11)*8 - smusd r8, r6, lr ; o10 = (i9+i10)*8 - (i8+i11)*8 - - smlad r6, r7, r12, r11 ; o9 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o11 = (d1 * 2217 - c1 * 5352 + 7500) - - ldrd r4, r5, [r0] ; [i13 | i12] [i15 | i14] - - pkhbt r2, r2, r6, lsl #4 ; [o9 | o8], keep in register for PART 2 - pkhbt r6, r8, r7, lsl #4 ; [o11 | o10] - - str r6, [r1, #20] - - ; coeffs 12-15 - ror r5, r5, #16 ; [i14 | i15] - - qadd16 r6, r4, r5 ; [i13+i14 | i12+i15]=[b1|a1] without shift - qsub16 r7, r4, r5 ; [i13-i14 | i12-i15]=[c1|d1] without shift - - qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd - ; with 2217*4 and 5352*4 without losing the - ; sign bit (overflow) - - smuad r4, r6, lr ; o12 = (i13+i14)*8 + (i12+i15)*8 - smusd r5, r6, lr ; o14 = (i13+i14)*8 - (i12+i15)*8 - - smlad r6, r7, r12, r11 ; o13 = (c1 * 2217 + d1 * 5352 + 14500) - smlsdx r7, r7, r12, r10 ; o15 = (d1 * 2217 - c1 * 5352 + 7500) - - pkhbt r0, r4, r6, lsl #4 ; [o13 | o12], keep in register for PART 2 - pkhbt r6, r5, r7, lsl #4 ; [o15 | o14] - - str r6, [r1, #28] - - - ; PART 2 ------------------------------------------------- - ldr r11, c12000 - ldr r10, c51000 - ldr lr, c0x00070007 - - qadd16 r4, r3, r0 ; a1 = [i1+i13 | i0+i12] - qadd16 r5, r9, r2 ; b1 = [i5+i9 | i4+i8] - qsub16 r6, r9, r2 ; c1 = [i5-i9 | i4-i8] - qsub16 r7, r3, r0 ; d1 = [i1-i13 | i0-i12] - - qadd16 r4, r4, lr ; a1 + 7 - - add r0, r11, #0x10000 ; add (d!=0) - - qadd16 r2, r4, r5 ; a1 + b1 + 7 - qsub16 r3, r4, r5 ; a1 - b1 + 7 - - ldr r12, c0x08a914e8 ; [2217 | 5352] - - lsl r8, r2, #16 ; prepare bottom halfword for scaling - asr r2, r2, #4 ; scale top halfword - lsl r9, r3, #16 ; prepare bottom halfword for scaling - asr r3, r3, #4 ; scale top halfword - pkhtb r4, r2, r8, asr #20 ; pack and scale bottom halfword - pkhtb r5, r3, r9, asr #20 ; pack and scale bottom halfword - - smulbt r2, r6, r12 ; [ ------ | c1*2217] - str r4, [r1, #0] ; [ o1 | o0] - smultt r3, r6, r12 ; [c1*2217 | ------ ] - str r5, [r1, #16] ; [ o9 | o8] - - smlabb r8, r7, r12, r2 ; [ ------ | d1*5352] - smlatb r9, r7, r12, r3 ; [d1*5352 | ------ ] - - smulbb r2, r6, r12 ; [ ------ | c1*5352] - smultb r3, r6, r12 ; [c1*5352 | ------ ] - - lsls r6, r7, #16 ; d1 != 0 ? - addeq r8, r8, r11 ; c1_b*2217+d1_b*5352+12000 + (d==0) - addne r8, r8, r0 ; c1_b*2217+d1_b*5352+12000 + (d!=0) - asrs r6, r7, #16 - addeq r9, r9, r11 ; c1_t*2217+d1_t*5352+12000 + (d==0) - addne r9, r9, r0 ; c1_t*2217+d1_t*5352+12000 + (d!=0) - - smlabt r4, r7, r12, r10 ; [ ------ | d1*2217] + 51000 - smlatt r5, r7, r12, r10 ; [d1*2217 | ------ ] + 51000 - - pkhtb r9, r9, r8, asr #16 - - sub r4, r4, r2 - sub r5, r5, r3 - - ldr r3, [r1, #4] ; [i3 | i2] - - pkhtb r5, r5, r4, asr #16 ; [o13|o12] - - str r9, [r1, #8] ; [o5 | 04] - - ldr r9, [r1, #12] ; [i7 | i6] - ldr r8, [r1, #28] ; [i15|i14] - ldr r2, [r1, #20] ; [i11|i10] - str r5, [r1, #24] ; [o13|o12] - - qadd16 r4, r3, r8 ; a1 = [i3+i15 | i2+i14] - qadd16 r5, r9, r2 ; b1 = [i7+i11 | i6+i10] - - qadd16 r4, r4, lr ; a1 + 7 - - qsub16 r6, r9, r2 ; c1 = [i7-i11 | i6-i10] - qadd16 r2, r4, r5 ; a1 + b1 + 7 - qsub16 r7, r3, r8 ; d1 = [i3-i15 | i2-i14] - qsub16 r3, r4, r5 ; a1 - b1 + 7 - - lsl r8, r2, #16 ; prepare bottom halfword for scaling - asr r2, r2, #4 ; scale top halfword - lsl r9, r3, #16 ; prepare bottom halfword for scaling - asr r3, r3, #4 ; scale top halfword - pkhtb r4, r2, r8, asr #20 ; pack and scale bottom halfword - pkhtb r5, r3, r9, asr #20 ; pack and scale bottom halfword - - smulbt r2, r6, r12 ; [ ------ | c1*2217] - str r4, [r1, #4] ; [ o3 | o2] - smultt r3, r6, r12 ; [c1*2217 | ------ ] - str r5, [r1, #20] ; [ o11 | o10] - - smlabb r8, r7, r12, r2 ; [ ------ | d1*5352] - smlatb r9, r7, r12, r3 ; [d1*5352 | ------ ] - - smulbb r2, r6, r12 ; [ ------ | c1*5352] - smultb r3, r6, r12 ; [c1*5352 | ------ ] - - lsls r6, r7, #16 ; d1 != 0 ? - addeq r8, r8, r11 ; c1_b*2217+d1_b*5352+12000 + (d==0) - addne r8, r8, r0 ; c1_b*2217+d1_b*5352+12000 + (d!=0) - - asrs r6, r7, #16 - addeq r9, r9, r11 ; c1_t*2217+d1_t*5352+12000 + (d==0) - addne r9, r9, r0 ; c1_t*2217+d1_t*5352+12000 + (d!=0) - - smlabt r4, r7, r12, r10 ; [ ------ | d1*2217] + 51000 - smlatt r5, r7, r12, r10 ; [d1*2217 | ------ ] + 51000 - - pkhtb r9, r9, r8, asr #16 - - sub r4, r4, r2 - sub r5, r5, r3 - - str r9, [r1, #12] ; [o7 | o6] - pkhtb r5, r5, r4, asr #16 ; [o15|o14] - - str r5, [r1, #28] ; [o15|o14] - - ldmfd sp!, {r4 - r12, pc} - - ENDP - -; Used constants -c7500 - DCD 7500 -c14500 - DCD 14500 -c0x22a453a0 - DCD 0x22a453a0 -c0x00080008 - DCD 0x00080008 -c12000 - DCD 12000 -c51000 - DCD 51000 -c0x00070007 - DCD 0x00070007 -c0x08a914e8 - DCD 0x08a914e8 - - END diff --git a/vp9/encoder/arm/armv6/vp9_subtract_armv6.asm b/vp9/encoder/arm/armv6/vp9_subtract_armv6.asm deleted file mode 100644 index e53c1ed5b..000000000 --- a/vp9/encoder/arm/armv6/vp9_subtract_armv6.asm +++ /dev/null @@ -1,264 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_subtract_mby_armv6| - EXPORT |vp8_subtract_mbuv_armv6| - EXPORT |vp8_subtract_b_armv6| - - INCLUDE vp9_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 BLOCK *be -; r1 BLOCKD *bd -; r2 int pitch -|vp8_subtract_b_armv6| PROC - - stmfd sp!, {r4-r9} - - ldr r4, [r0, #vp8_block_base_src] - ldr r5, [r0, #vp8_block_src] - ldr r6, [r0, #vp8_block_src_diff] - - ldr r3, [r4] - ldr r7, [r0, #vp8_block_src_stride] - add r3, r3, r5 ; src = *base_src + src - ldr r8, [r1, #vp8_blockd_predictor] - - mov r9, #4 ; loop count - -loop_block - - ldr r0, [r3], r7 ; src - ldr r1, [r8], r2 ; pred - - uxtb16 r4, r0 ; [s2 | s0] - uxtb16 r5, r1 ; [p2 | p0] - uxtb16 r0, r0, ror #8 ; [s3 | s1] - uxtb16 r1, r1, ror #8 ; [p3 | p1] - - usub16 r4, r4, r5 ; [d2 | d0] - usub16 r5, r0, r1 ; [d3 | d1] - - subs r9, r9, #1 ; decrement loop counter - - pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] - pkhtb r1, r5, r4, asr #16 ; [d3 | d2] - - str r0, [r6, #0] ; diff - str r1, [r6, #4] ; diff - - add r6, r6, r2, lsl #1 ; update diff pointer - bne loop_block - - ldmfd sp!, {r4-r9} - mov pc, lr - - ENDP - - -; r0 short *diff -; r1 unsigned char *usrc -; r2 unsigned char *vsrc -; r3 unsigned char *pred -; stack int stride -|vp8_subtract_mbuv_armv6| PROC - - stmfd sp!, {r4-r12, lr} - - add r0, r0, #512 ; set *diff point to Cb - add r3, r3, #256 ; set *pred point to Cb - - mov r4, #8 ; loop count - ldr r5, [sp, #40] ; stride - - ; Subtract U block -loop_u - ldr r6, [r1] ; src (A) - ldr r7, [r3], #4 ; pred (A) - - uxtb16 r8, r6 ; [s2 | s0] (A) - uxtb16 r9, r7 ; [p2 | p0] (A) - uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) - uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) - - usub16 r6, r8, r9 ; [d2 | d0] (A) - usub16 r7, r10, r11 ; [d3 | d1] (A) - - ldr r10, [r1, #4] ; src (B) - ldr r11, [r3], #4 ; pred (B) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) - - str r8, [r0], #4 ; diff (A) - uxtb16 r8, r10 ; [s2 | s0] (B) - str r9, [r0], #4 ; diff (A) - - uxtb16 r9, r11 ; [p2 | p0] (B) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) - - usub16 r6, r8, r9 ; [d2 | d0] (B) - usub16 r7, r10, r11 ; [d3 | d1] (B) - - add r1, r1, r5 ; update usrc pointer - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) - - str r8, [r0], #4 ; diff (B) - subs r4, r4, #1 ; update loop counter - str r9, [r0], #4 ; diff (B) - - bne loop_u - - mov r4, #8 ; loop count - - ; Subtract V block -loop_v - ldr r6, [r2] ; src (A) - ldr r7, [r3], #4 ; pred (A) - - uxtb16 r8, r6 ; [s2 | s0] (A) - uxtb16 r9, r7 ; [p2 | p0] (A) - uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) - uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) - - usub16 r6, r8, r9 ; [d2 | d0] (A) - usub16 r7, r10, r11 ; [d3 | d1] (A) - - ldr r10, [r2, #4] ; src (B) - ldr r11, [r3], #4 ; pred (B) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) - - str r8, [r0], #4 ; diff (A) - uxtb16 r8, r10 ; [s2 | s0] (B) - str r9, [r0], #4 ; diff (A) - - uxtb16 r9, r11 ; [p2 | p0] (B) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) - - usub16 r6, r8, r9 ; [d2 | d0] (B) - usub16 r7, r10, r11 ; [d3 | d1] (B) - - add r2, r2, r5 ; update vsrc pointer - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) - - str r8, [r0], #4 ; diff (B) - subs r4, r4, #1 ; update loop counter - str r9, [r0], #4 ; diff (B) - - bne loop_v - - ldmfd sp!, {r4-r12, pc} - - ENDP - - -; r0 short *diff -; r1 unsigned char *src -; r2 unsigned char *pred -; r3 int stride -|vp8_subtract_mby_armv6| PROC - - stmfd sp!, {r4-r11} - - mov r4, #16 -loop - ldr r6, [r1] ; src (A) - ldr r7, [r2], #4 ; pred (A) - - uxtb16 r8, r6 ; [s2 | s0] (A) - uxtb16 r9, r7 ; [p2 | p0] (A) - uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) - uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) - - usub16 r6, r8, r9 ; [d2 | d0] (A) - usub16 r7, r10, r11 ; [d3 | d1] (A) - - ldr r10, [r1, #4] ; src (B) - ldr r11, [r2], #4 ; pred (B) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) - - str r8, [r0], #4 ; diff (A) - uxtb16 r8, r10 ; [s2 | s0] (B) - str r9, [r0], #4 ; diff (A) - - uxtb16 r9, r11 ; [p2 | p0] (B) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) - - usub16 r6, r8, r9 ; [d2 | d0] (B) - usub16 r7, r10, r11 ; [d3 | d1] (B) - - ldr r10, [r1, #8] ; src (C) - ldr r11, [r2], #4 ; pred (C) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) - - str r8, [r0], #4 ; diff (B) - uxtb16 r8, r10 ; [s2 | s0] (C) - str r9, [r0], #4 ; diff (B) - - uxtb16 r9, r11 ; [p2 | p0] (C) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) - - usub16 r6, r8, r9 ; [d2 | d0] (C) - usub16 r7, r10, r11 ; [d3 | d1] (C) - - ldr r10, [r1, #12] ; src (D) - ldr r11, [r2], #4 ; pred (D) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) - - str r8, [r0], #4 ; diff (C) - uxtb16 r8, r10 ; [s2 | s0] (D) - str r9, [r0], #4 ; diff (C) - - uxtb16 r9, r11 ; [p2 | p0] (D) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) - - usub16 r6, r8, r9 ; [d2 | d0] (D) - usub16 r7, r10, r11 ; [d3 | d1] (D) - - add r1, r1, r3 ; update src pointer - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) - - str r8, [r0], #4 ; diff (D) - subs r4, r4, #1 ; update loop counter - str r9, [r0], #4 ; diff (D) - - bne loop - - ldmfd sp!, {r4-r11} - mov pc, lr - - ENDP - - END diff --git a/vp9/encoder/arm/armv6/vp9_variance16x16_armv6.asm b/vp9/encoder/arm/armv6/vp9_variance16x16_armv6.asm deleted file mode 100644 index aa4727e66..000000000 --- a/vp9/encoder/arm/armv6/vp9_variance16x16_armv6.asm +++ /dev/null @@ -1,153 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance16x16_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance16x16_armv6| PROC - - stmfd sp!, {r4-r12, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r8, #0 ; initialize sum = 0 - mov r11, #0 ; initialize sse = 0 - mov r12, #16 ; set loop counter to 16 (=block height) - -loop - ; 1st 4 pixels - ldr r4, [r0, #0] ; load 4 src pixels - ldr r5, [r2, #0] ; load 4 ref pixels - - mov lr, #0 ; constant zero - - usub8 r6, r4, r5 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - ; calculate total sum - adds r8, r8, r4 ; add positive differences to sum - subs r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r4, [r0, #4] ; load 4 src pixels - ldr r5, [r2, #4] ; load 4 ref pixels - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r4, [r0, #8] ; load 4 src pixels - ldr r5, [r2, #8] ; load 4 ref pixels - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r4, [r0, #12] ; load 4 src pixels - ldr r5, [r2, #12] ; load 4 ref pixels - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r6, lr ; select bytes with positive difference - usub8 r9, r5, r4 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r6, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r10, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) - - - subs r12, r12, #1 - - bne loop - - ; return stuff - ldr r6, [sp, #40] ; get address of sse - mul r0, r8, r8 ; sum * sum - str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) - - ldmfd sp!, {r4-r12, pc} - - ENDP - - END diff --git a/vp9/encoder/arm/armv6/vp9_variance8x8_armv6.asm b/vp9/encoder/arm/armv6/vp9_variance8x8_armv6.asm deleted file mode 100644 index 101f6838d..000000000 --- a/vp9/encoder/arm/armv6/vp9_variance8x8_armv6.asm +++ /dev/null @@ -1,101 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance8x8_armv6| - - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance8x8_armv6| PROC - - push {r4-r10, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r12, #8 ; set loop counter to 8 (=block height) - mov r4, #0 ; initialize sum = 0 - mov r5, #0 ; initialize sse = 0 - -loop - ; 1st 4 pixels - ldr r6, [r0, #0x0] ; load 4 src pixels - ldr r7, [r2, #0x0] ; load 4 ref pixels - - mov lr, #0 ; constant zero - - usub8 r8, r6, r7 ; calculate difference - pld [r0, r1, lsl #1] - sel r10, r8, lr ; select bytes with positive difference - usub8 r9, r7, r6 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r6, r10, lr ; calculate sum of positive differences - usad8 r7, r8, lr ; calculate sum of negative differences - orr r8, r8, r10 ; differences of all 4 pixels - ; calculate total sum - add r4, r4, r6 ; add positive differences to sum - sub r4, r4, r7 ; substract negative differences from sum - - ; calculate sse - uxtb16 r7, r8 ; byte (two pixels) to halfwords - uxtb16 r10, r8, ror #8 ; another two pixels to halfwords - smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r6, [r0, #0x4] ; load 4 src pixels - ldr r7, [r2, #0x4] ; load 4 ref pixels - smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2) - - usub8 r8, r6, r7 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r10, r8, lr ; select bytes with positive difference - usub8 r9, r7, r6 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r8, r9, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r6, r10, lr ; calculate sum of positive differences - usad8 r7, r8, lr ; calculate sum of negative differences - orr r8, r8, r10 ; differences of all 4 pixels - - ; calculate total sum - add r4, r4, r6 ; add positive differences to sum - sub r4, r4, r7 ; substract negative differences from sum - - ; calculate sse - uxtb16 r7, r8 ; byte (two pixels) to halfwords - uxtb16 r10, r8, ror #8 ; another two pixels to halfwords - smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1) - subs r12, r12, #1 ; next row - smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2) - - bne loop - - ; return stuff - ldr r8, [sp, #32] ; get address of sse - mul r1, r4, r4 ; sum * sum - str r5, [r8] ; store sse - sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6)) - - pop {r4-r10, pc} - - ENDP - - END diff --git a/vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_h_armv6.asm b/vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_h_armv6.asm deleted file mode 100644 index e25436c22..000000000 --- a/vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_h_armv6.asm +++ /dev/null @@ -1,181 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance_halfpixvar16x16_h_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance_halfpixvar16x16_h_armv6| PROC - - stmfd sp!, {r4-r12, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r8, #0 ; initialize sum = 0 - ldr r10, c80808080 - mov r11, #0 ; initialize sse = 0 - mov r12, #16 ; set loop counter to 16 (=block height) - mov lr, #0 ; constant zero -loop - ; 1st 4 pixels - ldr r4, [r0, #0] ; load 4 src pixels - ldr r6, [r0, #1] ; load 4 src pixels with 1 byte offset - ldr r5, [r2, #0] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - ; calculate total sum - adds r8, r8, r4 ; add positive differences to sum - subs r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r4, [r0, #4] ; load 4 src pixels - ldr r6, [r0, #5] ; load 4 src pixels with 1 byte offset - ldr r5, [r2, #4] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r4, [r0, #8] ; load 4 src pixels - ldr r6, [r0, #9] ; load 4 src pixels with 1 byte offset - ldr r5, [r2, #8] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r4, [r0, #12] ; load 4 src pixels - ldr r6, [r0, #13] ; load 4 src pixels with 1 byte offset - ldr r5, [r2, #12] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - subs r12, r12, #1 - - bne loop - - ; return stuff - ldr r6, [sp, #40] ; get address of sse - mul r0, r8, r8 ; sum * sum - str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) - - ldmfd sp!, {r4-r12, pc} - - ENDP - -c80808080 - DCD 0x80808080 - - END diff --git a/vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_hv_armv6.asm b/vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_hv_armv6.asm deleted file mode 100644 index 6ad5e90bb..000000000 --- a/vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_hv_armv6.asm +++ /dev/null @@ -1,222 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance_halfpixvar16x16_hv_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance_halfpixvar16x16_hv_armv6| PROC - - stmfd sp!, {r4-r12, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r8, #0 ; initialize sum = 0 - ldr r10, c80808080 - mov r11, #0 ; initialize sse = 0 - mov r12, #16 ; set loop counter to 16 (=block height) - mov lr, #0 ; constant zero -loop - add r9, r0, r1 ; pointer to pixels on the next row - ; 1st 4 pixels - ldr r4, [r0, #0] ; load source pixels a, row N - ldr r6, [r0, #1] ; load source pixels b, row N - ldr r5, [r9, #0] ; load source pixels c, row N+1 - ldr r7, [r9, #1] ; load source pixels d, row N+1 - - ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 - mvn r7, r7 - uhsub8 r5, r5, r7 - eor r5, r5, r10 - ; z = (x + y + 1) >> 1, interpolate half pixel values vertically - mvn r5, r5 - uhsub8 r4, r4, r5 - ldr r5, [r2, #0] ; load 4 ref pixels - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - ; calculate total sum - adds r8, r8, r4 ; add positive differences to sum - subs r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r4, [r0, #4] ; load source pixels a, row N - ldr r6, [r0, #5] ; load source pixels b, row N - ldr r5, [r9, #4] ; load source pixels c, row N+1 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - ldr r7, [r9, #5] ; load source pixels d, row N+1 - - ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 - mvn r7, r7 - uhsub8 r5, r5, r7 - eor r5, r5, r10 - ; z = (x + y + 1) >> 1, interpolate half pixel values vertically - mvn r5, r5 - uhsub8 r4, r4, r5 - ldr r5, [r2, #4] ; load 4 ref pixels - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r4, [r0, #8] ; load source pixels a, row N - ldr r6, [r0, #9] ; load source pixels b, row N - ldr r5, [r9, #8] ; load source pixels c, row N+1 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - ldr r7, [r9, #9] ; load source pixels d, row N+1 - - ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 - mvn r7, r7 - uhsub8 r5, r5, r7 - eor r5, r5, r10 - ; z = (x + y + 1) >> 1, interpolate half pixel values vertically - mvn r5, r5 - uhsub8 r4, r4, r5 - ldr r5, [r2, #8] ; load 4 ref pixels - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r4, [r0, #12] ; load source pixels a, row N - ldr r6, [r0, #13] ; load source pixels b, row N - ldr r5, [r9, #12] ; load source pixels c, row N+1 - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - ldr r7, [r9, #13] ; load source pixels d, row N+1 - - ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 - mvn r7, r7 - uhsub8 r5, r5, r7 - eor r5, r5, r10 - ; z = (x + y + 1) >> 1, interpolate half pixel values vertically - mvn r5, r5 - uhsub8 r4, r4, r5 - ldr r5, [r2, #12] ; load 4 ref pixels - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - subs r12, r12, #1 - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - bne loop - - ; return stuff - ldr r6, [sp, #40] ; get address of sse - mul r0, r8, r8 ; sum * sum - str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) - - ldmfd sp!, {r4-r12, pc} - - ENDP - -c80808080 - DCD 0x80808080 - - END diff --git a/vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_v_armv6.asm b/vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_v_armv6.asm deleted file mode 100644 index c1ac5a1cb..000000000 --- a/vp9/encoder/arm/armv6/vp9_variance_halfpixvar16x16_v_armv6.asm +++ /dev/null @@ -1,183 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance_halfpixvar16x16_v_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -|vp9_variance_halfpixvar16x16_v_armv6| PROC - - stmfd sp!, {r4-r12, lr} - - pld [r0, r1, lsl #0] - pld [r2, r3, lsl #0] - - mov r8, #0 ; initialize sum = 0 - ldr r10, c80808080 - mov r11, #0 ; initialize sse = 0 - mov r12, #16 ; set loop counter to 16 (=block height) - mov lr, #0 ; constant zero -loop - add r9, r0, r1 ; set src pointer to next row - ; 1st 4 pixels - ldr r4, [r0, #0] ; load 4 src pixels - ldr r6, [r9, #0] ; load 4 src pixels from next row - ldr r5, [r2, #0] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - usub8 r6, r4, r5 ; calculate difference - pld [r0, r1, lsl #1] - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - pld [r2, r3, lsl #1] - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - ; calculate total sum - adds r8, r8, r4 ; add positive differences to sum - subs r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 2nd 4 pixels - ldr r4, [r0, #4] ; load 4 src pixels - ldr r6, [r9, #4] ; load 4 src pixels from next row - ldr r5, [r2, #4] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 3rd 4 pixels - ldr r4, [r0, #8] ; load 4 src pixels - ldr r6, [r9, #8] ; load 4 src pixels from next row - ldr r5, [r2, #8] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - - ; 4th 4 pixels - ldr r4, [r0, #12] ; load 4 src pixels - ldr r6, [r9, #12] ; load 4 src pixels from next row - ldr r5, [r2, #12] ; load 4 ref pixels - - ; bilinear interpolation - mvn r6, r6 - uhsub8 r4, r4, r6 - eor r4, r4, r10 - - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - usub8 r6, r4, r5 ; calculate difference - add r0, r0, r1 ; set src_ptr to next row - sel r7, r6, lr ; select bytes with positive difference - usub8 r6, r5, r4 ; calculate difference with reversed operands - add r2, r2, r3 ; set dst_ptr to next row - sel r6, r6, lr ; select bytes with negative difference - - ; calculate partial sums - usad8 r4, r7, lr ; calculate sum of positive differences - usad8 r5, r6, lr ; calculate sum of negative differences - orr r6, r6, r7 ; differences of all 4 pixels - - ; calculate total sum - add r8, r8, r4 ; add positive differences to sum - sub r8, r8, r5 ; substract negative differences from sum - - ; calculate sse - uxtb16 r5, r6 ; byte (two pixels) to halfwords - uxtb16 r7, r6, ror #8 ; another two pixels to halfwords - smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) - smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) - - - subs r12, r12, #1 - - bne loop - - ; return stuff - ldr r6, [sp, #40] ; get address of sse - mul r0, r8, r8 ; sum * sum - str r11, [r6] ; store sse - sub r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) - - ldmfd sp!, {r4-r12, pc} - - ENDP - -c80808080 - DCD 0x80808080 - - END diff --git a/vp9/encoder/arm/armv6/vp9_walsh_v6.asm b/vp9/encoder/arm/armv6/vp9_walsh_v6.asm deleted file mode 100644 index 5eaf3f25a..000000000 --- a/vp9/encoder/arm/armv6/vp9_walsh_v6.asm +++ /dev/null @@ -1,212 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp8_short_walsh4x4_armv6| - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY ; name this block of code - -;short vp8_short_walsh4x4_armv6(short *input, short *output, int pitch) -; r0 short *input, -; r1 short *output, -; r2 int pitch -|vp8_short_walsh4x4_armv6| PROC - - stmdb sp!, {r4 - r11, lr} - - ldrd r4, r5, [r0], r2 - ldr lr, c00040004 - ldrd r6, r7, [r0], r2 - - ; 0-3 - qadd16 r3, r4, r5 ; [d1|a1] [1+3 | 0+2] - qsub16 r4, r4, r5 ; [c1|b1] [1-3 | 0-2] - - ldrd r8, r9, [r0], r2 - ; 4-7 - qadd16 r5, r6, r7 ; [d1|a1] [5+7 | 4+6] - qsub16 r6, r6, r7 ; [c1|b1] [5-7 | 4-6] - - ldrd r10, r11, [r0] - ; 8-11 - qadd16 r7, r8, r9 ; [d1|a1] [9+11 | 8+10] - qsub16 r8, r8, r9 ; [c1|b1] [9-11 | 8-10] - - ; 12-15 - qadd16 r9, r10, r11 ; [d1|a1] [13+15 | 12+14] - qsub16 r10, r10, r11 ; [c1|b1] [13-15 | 12-14] - - - lsls r2, r3, #16 - smuad r11, r3, lr ; A0 = a1<<2 + d1<<2 - addne r11, r11, #1 ; A0 += (a1!=0) - - lsls r2, r7, #16 - smuad r12, r7, lr ; C0 = a1<<2 + d1<<2 - addne r12, r12, #1 ; C0 += (a1!=0) - - add r0, r11, r12 ; a1_0 = A0 + C0 - sub r11, r11, r12 ; b1_0 = A0 - C0 - - lsls r2, r5, #16 - smuad r12, r5, lr ; B0 = a1<<2 + d1<<2 - addne r12, r12, #1 ; B0 += (a1!=0) - - lsls r2, r9, #16 - smuad r2, r9, lr ; D0 = a1<<2 + d1<<2 - addne r2, r2, #1 ; D0 += (a1!=0) - - add lr, r12, r2 ; d1_0 = B0 + D0 - sub r12, r12, r2 ; c1_0 = B0 - D0 - - ; op[0,4,8,12] - adds r2, r0, lr ; a2 = a1_0 + d1_0 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r0, r0, lr ; d2 = a1_0 - d1_0 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1] ; op[0] - - addmi r0, r0, #1 ; += a2 < 0 - add r0, r0, #3 ; += 3 - ldr lr, c00040004 - mov r0, r0, asr #3 ; >> 3 - strh r0, [r1, #24] ; op[12] - - adds r2, r11, r12 ; b2 = b1_0 + c1_0 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r0, r11, r12 ; c2 = b1_0 - c1_0 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #8] ; op[4] - - addmi r0, r0, #1 ; += a2 < 0 - add r0, r0, #3 ; += 3 - smusd r3, r3, lr ; A3 = a1<<2 - d1<<2 - smusd r7, r7, lr ; C3 = a1<<2 - d1<<2 - mov r0, r0, asr #3 ; >> 3 - strh r0, [r1, #16] ; op[8] - - - ; op[3,7,11,15] - add r0, r3, r7 ; a1_3 = A3 + C3 - sub r3, r3, r7 ; b1_3 = A3 - C3 - - smusd r5, r5, lr ; B3 = a1<<2 - d1<<2 - smusd r9, r9, lr ; D3 = a1<<2 - d1<<2 - add r7, r5, r9 ; d1_3 = B3 + D3 - sub r5, r5, r9 ; c1_3 = B3 - D3 - - adds r2, r0, r7 ; a2 = a1_3 + d1_3 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - adds r9, r3, r5 ; b2 = b1_3 + c1_3 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #6] ; op[3] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - subs r2, r3, r5 ; c2 = b1_3 - c1_3 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #14] ; op[7] - - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r9, r0, r7 ; d2 = a1_3 - d1_3 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #22] ; op[11] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - smuad r3, r4, lr ; A1 = b1<<2 + c1<<2 - smuad r5, r8, lr ; C1 = b1<<2 + c1<<2 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #30] ; op[15] - - ; op[1,5,9,13] - add r0, r3, r5 ; a1_1 = A1 + C1 - sub r3, r3, r5 ; b1_1 = A1 - C1 - - smuad r7, r6, lr ; B1 = b1<<2 + c1<<2 - smuad r9, r10, lr ; D1 = b1<<2 + c1<<2 - add r5, r7, r9 ; d1_1 = B1 + D1 - sub r7, r7, r9 ; c1_1 = B1 - D1 - - adds r2, r0, r5 ; a2 = a1_1 + d1_1 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - adds r9, r3, r7 ; b2 = b1_1 + c1_1 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #2] ; op[1] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - subs r2, r3, r7 ; c2 = b1_1 - c1_1 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #10] ; op[5] - - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r9, r0, r5 ; d2 = a1_1 - d1_1 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #18] ; op[9] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - smusd r4, r4, lr ; A2 = b1<<2 - c1<<2 - smusd r8, r8, lr ; C2 = b1<<2 - c1<<2 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #26] ; op[13] - - - ; op[2,6,10,14] - add r11, r4, r8 ; a1_2 = A2 + C2 - sub r12, r4, r8 ; b1_2 = A2 - C2 - - smusd r6, r6, lr ; B2 = b1<<2 - c1<<2 - smusd r10, r10, lr ; D2 = b1<<2 - c1<<2 - add r4, r6, r10 ; d1_2 = B2 + D2 - sub r8, r6, r10 ; c1_2 = B2 - D2 - - adds r2, r11, r4 ; a2 = a1_2 + d1_2 - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - adds r9, r12, r8 ; b2 = b1_2 + c1_2 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #4] ; op[2] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - subs r2, r12, r8 ; c2 = b1_2 - c1_2 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #12] ; op[6] - - addmi r2, r2, #1 ; += a2 < 0 - add r2, r2, #3 ; += 3 - subs r9, r11, r4 ; d2 = a1_2 - d1_2 - mov r2, r2, asr #3 ; >> 3 - strh r2, [r1, #20] ; op[10] - - addmi r9, r9, #1 ; += a2 < 0 - add r9, r9, #3 ; += 3 - mov r9, r9, asr #3 ; >> 3 - strh r9, [r1, #28] ; op[14] - - - ldmia sp!, {r4 - r11, pc} - ENDP ; |vp8_short_walsh4x4_armv6| - -c00040004 - DCD 0x00040004 - - END diff --git a/vp9/encoder/arm/neon/vp9_fastquantizeb_neon.asm b/vp9/encoder/arm/neon/vp9_fastquantizeb_neon.asm deleted file mode 100644 index c68233617..000000000 --- a/vp9/encoder/arm/neon/vp9_fastquantizeb_neon.asm +++ /dev/null @@ -1,261 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_fast_quantize_b_neon| - EXPORT |vp8_fast_quantize_b_pair_neon| - - INCLUDE vp9_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=4 - -;vp8_fast_quantize_b_pair_neon(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2); -|vp8_fast_quantize_b_pair_neon| PROC - - stmfd sp!, {r4-r9} - vstmdb sp!, {q4-q7} - - ldr r4, [r0, #vp8_block_coeff] - ldr r5, [r0, #vp8_block_quant_fast] - ldr r6, [r0, #vp8_block_round] - - vld1.16 {q0, q1}, [r4@128] ; load z - - ldr r7, [r2, #vp8_blockd_qcoeff] - - vabs.s16 q4, q0 ; calculate x = abs(z) - vabs.s16 q5, q1 - - ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative - vshr.s16 q2, q0, #15 ; sz - vshr.s16 q3, q1, #15 - - vld1.s16 {q6, q7}, [r6@128] ; load round_ptr [0-15] - vld1.s16 {q8, q9}, [r5@128] ; load quant_ptr [0-15] - - ldr r4, [r1, #vp8_block_coeff] - - vadd.s16 q4, q6 ; x + Round - vadd.s16 q5, q7 - - vld1.16 {q0, q1}, [r4@128] ; load z2 - - vqdmulh.s16 q4, q8 ; y = ((Round+abs(z)) * Quant) >> 16 - vqdmulh.s16 q5, q9 - - vabs.s16 q10, q0 ; calculate x2 = abs(z_2) - vabs.s16 q11, q1 - vshr.s16 q12, q0, #15 ; sz2 - vshr.s16 q13, q1, #15 - - ;modify data to have its original sign - veor.s16 q4, q2 ; y^sz - veor.s16 q5, q3 - - vadd.s16 q10, q6 ; x2 + Round - vadd.s16 q11, q7 - - ldr r8, [r2, #vp8_blockd_dequant] - - vqdmulh.s16 q10, q8 ; y2 = ((Round+abs(z)) * Quant) >> 16 - vqdmulh.s16 q11, q9 - - vshr.s16 q4, #1 ; right shift 1 after vqdmulh - vshr.s16 q5, #1 - - vld1.s16 {q6, q7}, [r8@128] ;load dequant_ptr[i] - - vsub.s16 q4, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement) - vsub.s16 q5, q3 - - vshr.s16 q10, #1 ; right shift 1 after vqdmulh - vshr.s16 q11, #1 - - ldr r9, [r2, #vp8_blockd_dqcoeff] - - veor.s16 q10, q12 ; y2^sz2 - veor.s16 q11, q13 - - vst1.s16 {q4, q5}, [r7] ; store: qcoeff = x1 - - - vsub.s16 q10, q12 ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement) - vsub.s16 q11, q13 - - ldr r6, [r3, #vp8_blockd_qcoeff] - - vmul.s16 q2, q6, q4 ; x * Dequant - vmul.s16 q3, q7, q5 - - ldr r0, _inv_zig_zag_ ; load ptr of inverse zigzag table - - vceq.s16 q8, q8 ; set q8 to all 1 - - vst1.s16 {q10, q11}, [r6] ; store: qcoeff = x2 - - vmul.s16 q12, q6, q10 ; x2 * Dequant - vmul.s16 q13, q7, q11 - - vld1.16 {q6, q7}, [r0@128] ; load inverse scan order - - vtst.16 q14, q4, q8 ; now find eob - vtst.16 q15, q5, q8 ; non-zero element is set to all 1 - - vst1.s16 {q2, q3}, [r9] ; store dqcoeff = x * Dequant - - ldr r7, [r3, #vp8_blockd_dqcoeff] - - vand q0, q6, q14 ; get all valid numbers from scan array - vand q1, q7, q15 - - vst1.s16 {q12, q13}, [r7] ; store dqcoeff = x * Dequant - - vtst.16 q2, q10, q8 ; now find eob - vtst.16 q3, q11, q8 ; non-zero element is set to all 1 - - vmax.u16 q0, q0, q1 ; find maximum value in q0, q1 - - vand q10, q6, q2 ; get all valid numbers from scan array - vand q11, q7, q3 - vmax.u16 q10, q10, q11 ; find maximum value in q10, q11 - - vmax.u16 d0, d0, d1 - vmax.u16 d20, d20, d21 - vmovl.u16 q0, d0 - vmovl.u16 q10, d20 - - - vmax.u32 d0, d0, d1 - vmax.u32 d20, d20, d21 - vpmax.u32 d0, d0, d0 - vpmax.u32 d20, d20, d20 - - add r4, r2, #vp8_blockd_eob - add r5, r3, #vp8_blockd_eob - - vst1.32 {d0[0]}, [r4@32] - vst1.32 {d20[0]}, [r5@32] - - vldmia sp!, {q4-q7} - ldmfd sp!, {r4-r9} - bx lr - - ENDP - -;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) -|vp8_fast_quantize_b_neon| PROC - - stmfd sp!, {r4-r7} - - ldr r3, [r0, #vp8_block_coeff] - ldr r4, [r0, #vp8_block_quant_fast] - ldr r5, [r0, #vp8_block_round] - - vld1.16 {q0, q1}, [r3@128] ; load z - vorr.s16 q14, q0, q1 ; check if all zero (step 1) - ldr r6, [r1, #vp8_blockd_qcoeff] - ldr r7, [r1, #vp8_blockd_dqcoeff] - vorr.s16 d28, d28, d29 ; check if all zero (step 2) - - vabs.s16 q12, q0 ; calculate x = abs(z) - vabs.s16 q13, q1 - - ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative - vshr.s16 q2, q0, #15 ; sz - vmov r2, r3, d28 ; check if all zero (step 3) - vshr.s16 q3, q1, #15 - - vld1.s16 {q14, q15}, [r5@128]; load round_ptr [0-15] - vld1.s16 {q8, q9}, [r4@128] ; load quant_ptr [0-15] - - vadd.s16 q12, q14 ; x + Round - vadd.s16 q13, q15 - - ldr r0, _inv_zig_zag_ ; load ptr of inverse zigzag table - - vqdmulh.s16 q12, q8 ; y = ((Round+abs(z)) * Quant) >> 16 - vqdmulh.s16 q13, q9 - - vld1.16 {q10, q11}, [r0@128]; load inverse scan order - - vceq.s16 q8, q8 ; set q8 to all 1 - - ldr r4, [r1, #vp8_blockd_dequant] - - vshr.s16 q12, #1 ; right shift 1 after vqdmulh - vshr.s16 q13, #1 - - orr r2, r2, r3 ; check if all zero (step 4) - cmp r2, #0 ; check if all zero (step 5) - beq zero_output ; check if all zero (step 6) - - ;modify data to have its original sign - veor.s16 q12, q2 ; y^sz - veor.s16 q13, q3 - - vsub.s16 q12, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement) - vsub.s16 q13, q3 - - vld1.s16 {q2, q3}, [r4@128] ; load dequant_ptr[i] - - vtst.16 q14, q12, q8 ; now find eob - vtst.16 q15, q13, q8 ; non-zero element is set to all 1 - - vst1.s16 {q12, q13}, [r6@128]; store: qcoeff = x1 - - vand q10, q10, q14 ; get all valid numbers from scan array - vand q11, q11, q15 - - - vmax.u16 q0, q10, q11 ; find maximum value in q0, q1 - vmax.u16 d0, d0, d1 - vmovl.u16 q0, d0 - - vmul.s16 q2, q12 ; x * Dequant - vmul.s16 q3, q13 - - vmax.u32 d0, d0, d1 - vpmax.u32 d0, d0, d0 - - vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant - - add r4, r1, #vp8_blockd_eob - vst1.32 {d0[0]}, [r4@32] - - ldmfd sp!, {r4-r7} - bx lr - -zero_output - str r2, [r1, #vp8_blockd_eob] - vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0 - vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0 - - ldmfd sp!, {r4-r7} - bx lr - - ENDP - -; default inverse zigzag table is defined in vp9/common/vp9_entropy.c -_inv_zig_zag_ - DCD inv_zig_zag - - ALIGN 16 ; enable use of @128 bit aligned loads -inv_zig_zag - DCW 0x0001, 0x0002, 0x0006, 0x0007 - DCW 0x0003, 0x0005, 0x0008, 0x000d - DCW 0x0004, 0x0009, 0x000c, 0x000e - DCW 0x000a, 0x000b, 0x000f, 0x0010 - - END - diff --git a/vp9/encoder/arm/neon/vp9_memcpy_neon.asm b/vp9/encoder/arm/neon/vp9_memcpy_neon.asm deleted file mode 100644 index b0450e523..000000000 --- a/vp9/encoder/arm/neon/vp9_memcpy_neon.asm +++ /dev/null @@ -1,68 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_memcpy_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;========================================= -;void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz); -|vp8_memcpy_neon| PROC - ;pld [r1] ;preload pred data - ;pld [r1, #128] - ;pld [r1, #256] - ;pld [r1, #384] - - mov r12, r2, lsr #8 ;copy 256 bytes data at one time - -memcpy_neon_loop - vld1.8 {q0, q1}, [r1]! ;load src data - subs r12, r12, #1 - vld1.8 {q2, q3}, [r1]! - vst1.8 {q0, q1}, [r0]! ;copy to dst_ptr - vld1.8 {q4, q5}, [r1]! - vst1.8 {q2, q3}, [r0]! - vld1.8 {q6, q7}, [r1]! - vst1.8 {q4, q5}, [r0]! - vld1.8 {q8, q9}, [r1]! - vst1.8 {q6, q7}, [r0]! - vld1.8 {q10, q11}, [r1]! - vst1.8 {q8, q9}, [r0]! - vld1.8 {q12, q13}, [r1]! - vst1.8 {q10, q11}, [r0]! - vld1.8 {q14, q15}, [r1]! - vst1.8 {q12, q13}, [r0]! - vst1.8 {q14, q15}, [r0]! - - ;pld [r1] ;preload pred data -- need to adjust for real device - ;pld [r1, #128] - ;pld [r1, #256] - ;pld [r1, #384] - - bne memcpy_neon_loop - - ands r3, r2, #0xff ;extra copy - beq done_copy_neon_loop - -extra_copy_neon_loop - vld1.8 {q0}, [r1]! ;load src data - subs r3, r3, #16 - vst1.8 {q0}, [r0]! - bne extra_copy_neon_loop - -done_copy_neon_loop - bx lr - ENDP - - END diff --git a/vp9/encoder/arm/neon/vp9_mse16x16_neon.asm b/vp9/encoder/arm/neon/vp9_mse16x16_neon.asm deleted file mode 100644 index 4d1512d40..000000000 --- a/vp9/encoder/arm/neon/vp9_mse16x16_neon.asm +++ /dev/null @@ -1,116 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_mse16x16_neon| - EXPORT |vp8_get4x4sse_cs_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;============================ -; r0 unsigned char *src_ptr -; r1 int source_stride -; r2 unsigned char *ref_ptr -; r3 int recon_stride -; stack unsigned int *sse -;note: in this function, sum is never used. So, we can remove this part of calculation -;from vp9_variance(). - -|vp8_mse16x16_neon| PROC - vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse - vmov.i8 q8, #0 - vmov.i8 q9, #0 - vmov.i8 q10, #0 - - mov r12, #8 - -mse16x16_neon_loop - vld1.8 {q0}, [r0], r1 ;Load up source and reference - vld1.8 {q2}, [r2], r3 - vld1.8 {q1}, [r0], r1 - vld1.8 {q3}, [r2], r3 - - vsubl.u8 q11, d0, d4 - vsubl.u8 q12, d1, d5 - vsubl.u8 q13, d2, d6 - vsubl.u8 q14, d3, d7 - - vmlal.s16 q7, d22, d22 - vmlal.s16 q8, d23, d23 - - subs r12, r12, #1 - - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - vmlal.s16 q7, d26, d26 - vmlal.s16 q8, d27, d27 - vmlal.s16 q9, d28, d28 - vmlal.s16 q10, d29, d29 - - bne mse16x16_neon_loop - - vadd.u32 q7, q7, q8 - vadd.u32 q9, q9, q10 - - ldr r12, [sp] ;load *sse from stack - - vadd.u32 q10, q7, q9 - vpaddl.u32 q1, q10 - vadd.u64 d0, d2, d3 - - vst1.32 {d0[0]}, [r12] - vmov.32 r0, d0[0] - - bx lr - - ENDP - - -;============================= -; r0 unsigned char *src_ptr, -; r1 int source_stride, -; r2 unsigned char *ref_ptr, -; r3 int recon_stride -|vp8_get4x4sse_cs_neon| PROC - vld1.8 {d0}, [r0], r1 ;Load up source and reference - vld1.8 {d4}, [r2], r3 - vld1.8 {d1}, [r0], r1 - vld1.8 {d5}, [r2], r3 - vld1.8 {d2}, [r0], r1 - vld1.8 {d6}, [r2], r3 - vld1.8 {d3}, [r0], r1 - vld1.8 {d7}, [r2], r3 - - vsubl.u8 q11, d0, d4 - vsubl.u8 q12, d1, d5 - vsubl.u8 q13, d2, d6 - vsubl.u8 q14, d3, d7 - - vmull.s16 q7, d22, d22 - vmull.s16 q8, d24, d24 - vmull.s16 q9, d26, d26 - vmull.s16 q10, d28, d28 - - vadd.u32 q7, q7, q8 - vadd.u32 q9, q9, q10 - vadd.u32 q9, q7, q9 - - vpaddl.u32 q1, q9 - vadd.u64 d0, d2, d3 - - vmov.32 r0, d0[0] - bx lr - - ENDP - - END diff --git a/vp9/encoder/arm/neon/vp9_picklpf_arm.c b/vp9/encoder/arm/neon/vp9_picklpf_arm.c deleted file mode 100644 index b427e5ef7..000000000 --- a/vp9/encoder/arm/neon/vp9_picklpf_arm.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/encoder/vp9_onyx_int.h" -#include "vp9/encoder/vp9_quantize.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_scale/vpxscale.h" -#include "vp9/common/vp9_alloccommon.h" - -extern void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz); - - -void -vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction) { - unsigned char *src_y, *dst_y; - int yheight; - int ystride; - int border; - int yoffset; - int linestocopy; - - border = src_ybc->border; - yheight = src_ybc->y_height; - ystride = src_ybc->y_stride; - - linestocopy = (yheight >> (Fraction + 4)); - - if (linestocopy < 1) - linestocopy = 1; - - linestocopy <<= 4; - - yoffset = ystride * ((yheight >> 5) * 16 - 8); - src_y = src_ybc->y_buffer + yoffset; - dst_y = dst_ybc->y_buffer + yoffset; - - // vpx_memcpy (dst_y, src_y, ystride * (linestocopy +16)); - vp8_memcpy_neon((unsigned char *)dst_y, (unsigned char *)src_y, (int)(ystride * (linestocopy + 16))); -} diff --git a/vp9/encoder/arm/neon/vp9_sad16_neon.asm b/vp9/encoder/arm/neon/vp9_sad16_neon.asm deleted file mode 100644 index d7c590e15..000000000 --- a/vp9/encoder/arm/neon/vp9_sad16_neon.asm +++ /dev/null @@ -1,207 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sad16x16_neon| - EXPORT |vp8_sad16x8_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 unsigned char *src_ptr -; r1 int src_stride -; r2 unsigned char *ref_ptr -; r3 int ref_stride -|vp8_sad16x16_neon| PROC -;; - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabdl.u8 q12, d0, d8 - vabdl.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0], r1 - vld1.8 {q7}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - -;; - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabal.u8 q12, d0, d8 - vabal.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0], r1 - vld1.8 {q7}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - -;; - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabal.u8 q12, d0, d8 - vabal.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0], r1 - vld1.8 {q7}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - -;; - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabal.u8 q12, d0, d8 - vabal.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0] - vld1.8 {q7}, [r2] - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vadd.u16 q0, q12, q13 - - vpaddl.u16 q1, q0 - vpaddl.u32 q0, q1 - - vadd.u32 d0, d0, d1 - - vmov.32 r0, d0[0] - - bx lr - - ENDP - -;============================== -;unsigned int vp8_sad16x8_c( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -|vp8_sad16x8_neon| PROC - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabdl.u8 q12, d0, d8 - vabdl.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0], r1 - vld1.8 {q7}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - - vld1.8 {q0}, [r0], r1 - vld1.8 {q4}, [r2], r3 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vld1.8 {q1}, [r0], r1 - vld1.8 {q5}, [r2], r3 - - vabal.u8 q12, d0, d8 - vabal.u8 q13, d1, d9 - - vld1.8 {q2}, [r0], r1 - vld1.8 {q6}, [r2], r3 - - vabal.u8 q12, d2, d10 - vabal.u8 q13, d3, d11 - - vld1.8 {q3}, [r0], r1 - vld1.8 {q7}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q13, d5, d13 - - vabal.u8 q12, d6, d14 - vabal.u8 q13, d7, d15 - - vadd.u16 q0, q12, q13 - - vpaddl.u16 q1, q0 - vpaddl.u32 q0, q1 - - vadd.u32 d0, d0, d1 - - vmov.32 r0, d0[0] - - bx lr - - ENDP - - END diff --git a/vp9/encoder/arm/neon/vp9_sad8_neon.asm b/vp9/encoder/arm/neon/vp9_sad8_neon.asm deleted file mode 100644 index 23ba6df93..000000000 --- a/vp9/encoder/arm/neon/vp9_sad8_neon.asm +++ /dev/null @@ -1,209 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_sad8x8_neon| - EXPORT |vp8_sad8x16_neon| - EXPORT |vp8_sad4x4_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; unsigned int vp8_sad8x8_c( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) - -|vp8_sad8x8_neon| PROC - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabdl.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q12, d6, d14 - - vpaddl.u16 q1, q12 - vpaddl.u32 q0, q1 - vadd.u32 d0, d0, d1 - - vmov.32 r0, d0[0] - - bx lr - - ENDP - -;============================ -;unsigned int vp8_sad8x16_c( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) - -|vp8_sad8x16_neon| PROC - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabdl.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vabal.u8 q12, d6, d14 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabal.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q12, d6, d14 - - vpaddl.u16 q1, q12 - vpaddl.u32 q0, q1 - vadd.u32 d0, d0, d1 - - vmov.32 r0, d0[0] - - bx lr - - ENDP - -;=========================== -;unsigned int vp8_sad4x4_c( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) - -|vp8_sad4x4_neon| PROC - vld1.8 {d0}, [r0], r1 - vld1.8 {d8}, [r2], r3 - - vld1.8 {d2}, [r0], r1 - vld1.8 {d10}, [r2], r3 - - vabdl.u8 q12, d0, d8 - - vld1.8 {d4}, [r0], r1 - vld1.8 {d12}, [r2], r3 - - vabal.u8 q12, d2, d10 - - vld1.8 {d6}, [r0], r1 - vld1.8 {d14}, [r2], r3 - - vabal.u8 q12, d4, d12 - vabal.u8 q12, d6, d14 - - vpaddl.u16 d1, d24 - vpaddl.u32 d0, d1 - vmov.32 r0, d0[0] - - bx lr - - ENDP - - END diff --git a/vp9/encoder/arm/neon/vp9_shortfdct_neon.asm b/vp9/encoder/arm/neon/vp9_shortfdct_neon.asm deleted file mode 100644 index 09dd011ec..000000000 --- a/vp9/encoder/arm/neon/vp9_shortfdct_neon.asm +++ /dev/null @@ -1,221 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_short_fdct4x4_neon| - EXPORT |vp8_short_fdct8x4_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=4 - - - ALIGN 16 ; enable use of @128 bit aligned loads -coeff - DCW 5352, 5352, 5352, 5352 - DCW 2217, 2217, 2217, 2217 - DCD 14500, 14500, 14500, 14500 - DCD 7500, 7500, 7500, 7500 - DCD 12000, 12000, 12000, 12000 - DCD 51000, 51000, 51000, 51000 - -;void vp8_short_fdct4x4_c(short *input, short *output, int pitch) -|vp8_short_fdct4x4_neon| PROC - - ; Part one - vld1.16 {d0}, [r0@64], r2 - adr r12, coeff - vld1.16 {d1}, [r0@64], r2 - vld1.16 {q8}, [r12@128]! ; d16=5352, d17=2217 - vld1.16 {d2}, [r0@64], r2 - vld1.32 {q9, q10}, [r12@128]! ; q9=14500, q10=7500 - vld1.16 {d3}, [r0@64], r2 - - ; transpose d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3] - vtrn.32 d0, d2 - vtrn.32 d1, d3 - vld1.32 {q11,q12}, [r12@128] ; q11=12000, q12=51000 - vtrn.16 d0, d1 - vtrn.16 d2, d3 - - vadd.s16 d4, d0, d3 ; a1 = ip[0] + ip[3] - vadd.s16 d5, d1, d2 ; b1 = ip[1] + ip[2] - vsub.s16 d6, d1, d2 ; c1 = ip[1] - ip[2] - vsub.s16 d7, d0, d3 ; d1 = ip[0] - ip[3] - - vshl.s16 q2, q2, #3 ; (a1, b1) << 3 - vshl.s16 q3, q3, #3 ; (c1, d1) << 3 - - vadd.s16 d0, d4, d5 ; op[0] = a1 + b1 - vsub.s16 d2, d4, d5 ; op[2] = a1 - b1 - - vmlal.s16 q9, d7, d16 ; d1*5352 + 14500 - vmlal.s16 q10, d7, d17 ; d1*2217 + 7500 - vmlal.s16 q9, d6, d17 ; c1*2217 + d1*5352 + 14500 - vmlsl.s16 q10, d6, d16 ; d1*2217 - c1*5352 + 7500 - - vshrn.s32 d1, q9, #12 ; op[1] = (c1*2217 + d1*5352 + 14500)>>12 - vshrn.s32 d3, q10, #12 ; op[3] = (d1*2217 - c1*5352 + 7500)>>12 - - - ; Part two - - ; transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12] - vtrn.32 d0, d2 - vtrn.32 d1, d3 - vtrn.16 d0, d1 - vtrn.16 d2, d3 - - vmov.s16 d26, #7 - - vadd.s16 d4, d0, d3 ; a1 = ip[0] + ip[12] - vadd.s16 d5, d1, d2 ; b1 = ip[4] + ip[8] - vsub.s16 d6, d1, d2 ; c1 = ip[4] - ip[8] - vadd.s16 d4, d4, d26 ; a1 + 7 - vsub.s16 d7, d0, d3 ; d1 = ip[0] - ip[12] - - vadd.s16 d0, d4, d5 ; op[0] = a1 + b1 + 7 - vsub.s16 d2, d4, d5 ; op[8] = a1 - b1 + 7 - - vmlal.s16 q11, d7, d16 ; d1*5352 + 12000 - vmlal.s16 q12, d7, d17 ; d1*2217 + 51000 - - vceq.s16 d4, d7, #0 - - vshr.s16 d0, d0, #4 - vshr.s16 d2, d2, #4 - - vmlal.s16 q11, d6, d17 ; c1*2217 + d1*5352 + 12000 - vmlsl.s16 q12, d6, d16 ; d1*2217 - c1*5352 + 51000 - - vmvn.s16 d4, d4 - vshrn.s32 d1, q11, #16 ; op[4] = (c1*2217 + d1*5352 + 12000)>>16 - vsub.s16 d1, d1, d4 ; op[4] += (d1!=0) - vshrn.s32 d3, q12, #16 ; op[12]= (d1*2217 - c1*5352 + 51000)>>16 - - vst1.16 {q0, q1}, [r1@128] - - bx lr - - ENDP - -;void vp8_short_fdct8x4_c(short *input, short *output, int pitch) -|vp8_short_fdct8x4_neon| PROC - - ; Part one - - vld1.16 {q0}, [r0@128], r2 - adr r12, coeff - vld1.16 {q1}, [r0@128], r2 - vld1.16 {q8}, [r12@128]! ; d16=5352, d17=2217 - vld1.16 {q2}, [r0@128], r2 - vld1.32 {q9, q10}, [r12@128]! ; q9=14500, q10=7500 - vld1.16 {q3}, [r0@128], r2 - - ; transpose q0=ip[0], q1=ip[1], q2=ip[2], q3=ip[3] - vtrn.32 q0, q2 ; [A0|B0] - vtrn.32 q1, q3 ; [A1|B1] - vtrn.16 q0, q1 ; [A2|B2] - vtrn.16 q2, q3 ; [A3|B3] - - vadd.s16 q11, q0, q3 ; a1 = ip[0] + ip[3] - vadd.s16 q12, q1, q2 ; b1 = ip[1] + ip[2] - vsub.s16 q13, q1, q2 ; c1 = ip[1] - ip[2] - vsub.s16 q14, q0, q3 ; d1 = ip[0] - ip[3] - - vshl.s16 q11, q11, #3 ; a1 << 3 - vshl.s16 q12, q12, #3 ; b1 << 3 - vshl.s16 q13, q13, #3 ; c1 << 3 - vshl.s16 q14, q14, #3 ; d1 << 3 - - vadd.s16 q0, q11, q12 ; [A0 | B0] = a1 + b1 - vsub.s16 q2, q11, q12 ; [A2 | B2] = a1 - b1 - - vmov.s16 q11, q9 ; 14500 - vmov.s16 q12, q10 ; 7500 - - vmlal.s16 q9, d28, d16 ; A[1] = d1*5352 + 14500 - vmlal.s16 q10, d28, d17 ; A[3] = d1*2217 + 7500 - vmlal.s16 q11, d29, d16 ; B[1] = d1*5352 + 14500 - vmlal.s16 q12, d29, d17 ; B[3] = d1*2217 + 7500 - - vmlal.s16 q9, d26, d17 ; A[1] = c1*2217 + d1*5352 + 14500 - vmlsl.s16 q10, d26, d16 ; A[3] = d1*2217 - c1*5352 + 7500 - vmlal.s16 q11, d27, d17 ; B[1] = c1*2217 + d1*5352 + 14500 - vmlsl.s16 q12, d27, d16 ; B[3] = d1*2217 - c1*5352 + 7500 - - vshrn.s32 d2, q9, #12 ; A[1] = (c1*2217 + d1*5352 + 14500)>>12 - vshrn.s32 d6, q10, #12 ; A[3] = (d1*2217 - c1*5352 + 7500)>>12 - vshrn.s32 d3, q11, #12 ; B[1] = (c1*2217 + d1*5352 + 14500)>>12 - vshrn.s32 d7, q12, #12 ; B[3] = (d1*2217 - c1*5352 + 7500)>>12 - - - ; Part two - vld1.32 {q9,q10}, [r12@128] ; q9=12000, q10=51000 - - ; transpose q0=ip[0], q1=ip[4], q2=ip[8], q3=ip[12] - vtrn.32 q0, q2 ; q0=[A0 | B0] - vtrn.32 q1, q3 ; q1=[A4 | B4] - vtrn.16 q0, q1 ; q2=[A8 | B8] - vtrn.16 q2, q3 ; q3=[A12|B12] - - vmov.s16 q15, #7 - - vadd.s16 q11, q0, q3 ; a1 = ip[0] + ip[12] - vadd.s16 q12, q1, q2 ; b1 = ip[4] + ip[8] - vadd.s16 q11, q11, q15 ; a1 + 7 - vsub.s16 q13, q1, q2 ; c1 = ip[4] - ip[8] - vsub.s16 q14, q0, q3 ; d1 = ip[0] - ip[12] - - vadd.s16 q0, q11, q12 ; a1 + b1 + 7 - vsub.s16 q1, q11, q12 ; a1 - b1 + 7 - - vmov.s16 q11, q9 ; 12000 - vmov.s16 q12, q10 ; 51000 - - vshr.s16 d0, d0, #4 ; A[0] = (a1 + b1 + 7)>>4 - vshr.s16 d4, d1, #4 ; B[0] = (a1 + b1 + 7)>>4 - vshr.s16 d2, d2, #4 ; A[8] = (a1 + b1 + 7)>>4 - vshr.s16 d6, d3, #4 ; B[8] = (a1 + b1 + 7)>>4 - - - vmlal.s16 q9, d28, d16 ; A[4] = d1*5352 + 12000 - vmlal.s16 q10, d28, d17 ; A[12] = d1*2217 + 51000 - vmlal.s16 q11, d29, d16 ; B[4] = d1*5352 + 12000 - vmlal.s16 q12, d29, d17 ; B[12] = d1*2217 + 51000 - - vceq.s16 q14, q14, #0 - - vmlal.s16 q9, d26, d17 ; A[4] = c1*2217 + d1*5352 + 12000 - vmlsl.s16 q10, d26, d16 ; A[12] = d1*2217 - c1*5352 + 51000 - vmlal.s16 q11, d27, d17 ; B[4] = c1*2217 + d1*5352 + 12000 - vmlsl.s16 q12, d27, d16 ; B[12] = d1*2217 - c1*5352 + 51000 - - vmvn.s16 q14, q14 - - vshrn.s32 d1, q9, #16 ; A[4] = (c1*2217 + d1*5352 + 12000)>>16 - vshrn.s32 d3, q10, #16 ; A[12]= (d1*2217 - c1*5352 + 51000)>>16 - vsub.s16 d1, d1, d28 ; A[4] += (d1!=0) - - vshrn.s32 d5, q11, #16 ; B[4] = (c1*2217 + d1*5352 + 12000)>>16 - vshrn.s32 d7, q12, #16 ; B[12]= (d1*2217 - c1*5352 + 51000)>>16 - vsub.s16 d5, d5, d29 ; B[4] += (d1!=0) - - vst1.16 {q0, q1}, [r1@128]! ; block A - vst1.16 {q2, q3}, [r1@128]! ; block B - - bx lr - - ENDP - - END - diff --git a/vp9/encoder/arm/neon/vp9_shortwalsh4x4_neon.asm b/vp9/encoder/arm/neon/vp9_shortwalsh4x4_neon.asm deleted file mode 100644 index 22266297a..000000000 --- a/vp9/encoder/arm/neon/vp9_shortwalsh4x4_neon.asm +++ /dev/null @@ -1,103 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_short_walsh4x4_neon| - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp8_short_walsh4x4_neon(short *input, short *output, int pitch) -; r0 short *input, -; r1 short *output, -; r2 int pitch -|vp8_short_walsh4x4_neon| PROC - - vld1.16 {d0}, [r0@64], r2 ; load input - vld1.16 {d1}, [r0@64], r2 - vld1.16 {d2}, [r0@64], r2 - vld1.16 {d3}, [r0@64] - - ;First for-loop - ;transpose d0, d1, d2, d3. Then, d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3] - vtrn.32 d0, d2 - vtrn.32 d1, d3 - - vmov.s32 q15, #3 ; add 3 to all values - - vtrn.16 d0, d1 - vtrn.16 d2, d3 - - vadd.s16 d4, d0, d2 ; ip[0] + ip[2] - vadd.s16 d5, d1, d3 ; ip[1] + ip[3] - vsub.s16 d6, d1, d3 ; ip[1] - ip[3] - vsub.s16 d7, d0, d2 ; ip[0] - ip[2] - - vshl.s16 d4, d4, #2 ; a1 = (ip[0] + ip[2]) << 2 - vshl.s16 d5, d5, #2 ; d1 = (ip[1] + ip[3]) << 2 - vshl.s16 d6, d6, #2 ; c1 = (ip[1] - ip[3]) << 2 - vceq.s16 d16, d4, #0 ; a1 == 0 - vshl.s16 d7, d7, #2 ; b1 = (ip[0] - ip[2]) << 2 - - vadd.s16 d0, d4, d5 ; a1 + d1 - vmvn d16, d16 ; a1 != 0 - vsub.s16 d3, d4, d5 ; op[3] = a1 - d1 - vadd.s16 d1, d7, d6 ; op[1] = b1 + c1 - vsub.s16 d2, d7, d6 ; op[2] = b1 - c1 - vsub.s16 d0, d0, d16 ; op[0] = a1 + d1 + (a1 != 0) - - ;Second for-loop - ;transpose d0, d1, d2, d3, Then, d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12] - vtrn.32 d1, d3 - vtrn.32 d0, d2 - vtrn.16 d2, d3 - vtrn.16 d0, d1 - - vaddl.s16 q8, d0, d2 ; a1 = ip[0]+ip[8] - vaddl.s16 q9, d1, d3 ; d1 = ip[4]+ip[12] - vsubl.s16 q10, d1, d3 ; c1 = ip[4]-ip[12] - vsubl.s16 q11, d0, d2 ; b1 = ip[0]-ip[8] - - vadd.s32 q0, q8, q9 ; a2 = a1 + d1 - vadd.s32 q1, q11, q10 ; b2 = b1 + c1 - vsub.s32 q2, q11, q10 ; c2 = b1 - c1 - vsub.s32 q3, q8, q9 ; d2 = a1 - d1 - - vclt.s32 q8, q0, #0 - vclt.s32 q9, q1, #0 - vclt.s32 q10, q2, #0 - vclt.s32 q11, q3, #0 - - ; subtract -1 (or 0) - vsub.s32 q0, q0, q8 ; a2 += a2 < 0 - vsub.s32 q1, q1, q9 ; b2 += b2 < 0 - vsub.s32 q2, q2, q10 ; c2 += c2 < 0 - vsub.s32 q3, q3, q11 ; d2 += d2 < 0 - - vadd.s32 q8, q0, q15 ; a2 + 3 - vadd.s32 q9, q1, q15 ; b2 + 3 - vadd.s32 q10, q2, q15 ; c2 + 3 - vadd.s32 q11, q3, q15 ; d2 + 3 - - ; vrshrn? would add 1 << 3-1 = 2 - vshrn.s32 d0, q8, #3 - vshrn.s32 d1, q9, #3 - vshrn.s32 d2, q10, #3 - vshrn.s32 d3, q11, #3 - - vst1.16 {q0, q1}, [r1@128] - - bx lr - - ENDP - - END diff --git a/vp9/encoder/arm/neon/vp9_subpixelvariance16x16_neon.asm b/vp9/encoder/arm/neon/vp9_subpixelvariance16x16_neon.asm deleted file mode 100644 index 8bb0734d1..000000000 --- a/vp9/encoder/arm/neon/vp9_subpixelvariance16x16_neon.asm +++ /dev/null @@ -1,425 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_sub_pixel_variance16x16_neon_func| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; stack(r4) unsigned char *dst_ptr, -; stack(r5) int dst_pixels_per_line, -; stack(r6) unsigned int *sse -;note: most of the code is copied from bilinear_predict16x16_neon and vp9_variance16x16_neon. - -|vp9_sub_pixel_variance16x16_neon_func| PROC - push {r4-r6, lr} - - ldr r12, _BilinearTaps_coeff_ - ldr r4, [sp, #16] ;load *dst_ptr from stack - ldr r5, [sp, #20] ;load dst_pixels_per_line from stack - ldr r6, [sp, #24] ;load *sse from stack - - cmp r2, #0 ;skip first_pass filter if xoffset=0 - beq secondpass_bfilter16x16_only - - add r2, r12, r2, lsl #3 ;calculate filter location - - cmp r3, #0 ;skip second_pass filter if yoffset=0 - - vld1.s32 {d31}, [r2] ;load first_pass filter - - beq firstpass_bfilter16x16_only - - sub sp, sp, #272 ;reserve space on stack for temporary storage - vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data - mov lr, sp - vld1.u8 {d5, d6, d7}, [r0], r1 - - mov r2, #3 ;loop counter - vld1.u8 {d8, d9, d10}, [r0], r1 - - vdup.8 d0, d31[0] ;first_pass filter (d0 d1) - vld1.u8 {d11, d12, d13}, [r0], r1 - - vdup.8 d1, d31[4] - -;First Pass: output_height lines x output_width columns (17x16) -vp8e_filt_blk2d_fp16x16_loop_neon - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vmull.u8 q7, d2, d0 ;(src_ptr[0] * Filter[0]) - vmull.u8 q8, d3, d0 - vmull.u8 q9, d5, d0 - vmull.u8 q10, d6, d0 - vmull.u8 q11, d8, d0 - vmull.u8 q12, d9, d0 - vmull.u8 q13, d11, d0 - vmull.u8 q14, d12, d0 - - vext.8 d2, d2, d3, #1 ;construct src_ptr[1] - vext.8 d5, d5, d6, #1 - vext.8 d8, d8, d9, #1 - vext.8 d11, d11, d12, #1 - - vmlal.u8 q7, d2, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q9, d5, d1 - vmlal.u8 q11, d8, d1 - vmlal.u8 q13, d11, d1 - - vext.8 d3, d3, d4, #1 - vext.8 d6, d6, d7, #1 - vext.8 d9, d9, d10, #1 - vext.8 d12, d12, d13, #1 - - vmlal.u8 q8, d3, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q10, d6, d1 - vmlal.u8 q12, d9, d1 - vmlal.u8 q14, d12, d1 - - subs r2, r2, #1 - - vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d15, q8, #7 - vqrshrn.u16 d16, q9, #7 - vqrshrn.u16 d17, q10, #7 - vqrshrn.u16 d18, q11, #7 - vqrshrn.u16 d19, q12, #7 - vqrshrn.u16 d20, q13, #7 - - vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data - vqrshrn.u16 d21, q14, #7 - vld1.u8 {d5, d6, d7}, [r0], r1 - - vst1.u8 {d14, d15, d16, d17}, [lr]! ;store result - vld1.u8 {d8, d9, d10}, [r0], r1 - vst1.u8 {d18, d19, d20, d21}, [lr]! - vld1.u8 {d11, d12, d13}, [r0], r1 - - bne vp8e_filt_blk2d_fp16x16_loop_neon - -;First-pass filtering for rest 5 lines - vld1.u8 {d14, d15, d16}, [r0], r1 - - vmull.u8 q9, d2, d0 ;(src_ptr[0] * Filter[0]) - vmull.u8 q10, d3, d0 - vmull.u8 q11, d5, d0 - vmull.u8 q12, d6, d0 - vmull.u8 q13, d8, d0 - vmull.u8 q14, d9, d0 - - vext.8 d2, d2, d3, #1 ;construct src_ptr[1] - vext.8 d5, d5, d6, #1 - vext.8 d8, d8, d9, #1 - - vmlal.u8 q9, d2, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q11, d5, d1 - vmlal.u8 q13, d8, d1 - - vext.8 d3, d3, d4, #1 - vext.8 d6, d6, d7, #1 - vext.8 d9, d9, d10, #1 - - vmlal.u8 q10, d3, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q12, d6, d1 - vmlal.u8 q14, d9, d1 - - vmull.u8 q1, d11, d0 - vmull.u8 q2, d12, d0 - vmull.u8 q3, d14, d0 - vmull.u8 q4, d15, d0 - - vext.8 d11, d11, d12, #1 ;construct src_ptr[1] - vext.8 d14, d14, d15, #1 - - vmlal.u8 q1, d11, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q3, d14, d1 - - vext.8 d12, d12, d13, #1 - vext.8 d15, d15, d16, #1 - - vmlal.u8 q2, d12, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q4, d15, d1 - - vqrshrn.u16 d10, q9, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d11, q10, #7 - vqrshrn.u16 d12, q11, #7 - vqrshrn.u16 d13, q12, #7 - vqrshrn.u16 d14, q13, #7 - vqrshrn.u16 d15, q14, #7 - vqrshrn.u16 d16, q1, #7 - vqrshrn.u16 d17, q2, #7 - vqrshrn.u16 d18, q3, #7 - vqrshrn.u16 d19, q4, #7 - - vst1.u8 {d10, d11, d12, d13}, [lr]! ;store result - vst1.u8 {d14, d15, d16, d17}, [lr]! - vst1.u8 {d18, d19}, [lr]! - -;Second pass: 16x16 -;secondpass_filter - add r3, r12, r3, lsl #3 - sub lr, lr, #272 - - vld1.u32 {d31}, [r3] ;load second_pass filter - - sub sp, sp, #256 - mov r3, sp - - vld1.u8 {d22, d23}, [lr]! ;load src data - - vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) - vdup.8 d1, d31[4] - mov r12, #4 ;loop counter - -vp8e_filt_blk2d_sp16x16_loop_neon - vld1.u8 {d24, d25}, [lr]! - vmull.u8 q1, d22, d0 ;(src_ptr[0] * Filter[0]) - vld1.u8 {d26, d27}, [lr]! - vmull.u8 q2, d23, d0 - vld1.u8 {d28, d29}, [lr]! - vmull.u8 q3, d24, d0 - vld1.u8 {d30, d31}, [lr]! - - vmull.u8 q4, d25, d0 - vmull.u8 q5, d26, d0 - vmull.u8 q6, d27, d0 - vmull.u8 q7, d28, d0 - vmull.u8 q8, d29, d0 - - vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * Filter[1]) - vmlal.u8 q2, d25, d1 - vmlal.u8 q3, d26, d1 - vmlal.u8 q4, d27, d1 - vmlal.u8 q5, d28, d1 - vmlal.u8 q6, d29, d1 - vmlal.u8 q7, d30, d1 - vmlal.u8 q8, d31, d1 - - subs r12, r12, #1 - - vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d3, q2, #7 - vqrshrn.u16 d4, q3, #7 - vqrshrn.u16 d5, q4, #7 - vqrshrn.u16 d6, q5, #7 - vqrshrn.u16 d7, q6, #7 - vqrshrn.u16 d8, q7, #7 - vqrshrn.u16 d9, q8, #7 - - vst1.u8 {d2, d3}, [r3]! ;store result - vst1.u8 {d4, d5}, [r3]! - vst1.u8 {d6, d7}, [r3]! - vmov q11, q15 - vst1.u8 {d8, d9}, [r3]! - - bne vp8e_filt_blk2d_sp16x16_loop_neon - - b sub_pixel_variance16x16_neon - -;-------------------- -firstpass_bfilter16x16_only - mov r2, #4 ;loop counter - sub sp, sp, #528 ;reserve space on stack for temporary storage - vdup.8 d0, d31[0] ;first_pass filter (d0 d1) - vdup.8 d1, d31[4] - mov r3, sp - -;First Pass: output_height lines x output_width columns (16x16) -vp8e_filt_blk2d_fpo16x16_loop_neon - vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data - vld1.u8 {d5, d6, d7}, [r0], r1 - vld1.u8 {d8, d9, d10}, [r0], r1 - vld1.u8 {d11, d12, d13}, [r0], r1 - - pld [r0] - pld [r0, r1] - pld [r0, r1, lsl #1] - - vmull.u8 q7, d2, d0 ;(src_ptr[0] * Filter[0]) - vmull.u8 q8, d3, d0 - vmull.u8 q9, d5, d0 - vmull.u8 q10, d6, d0 - vmull.u8 q11, d8, d0 - vmull.u8 q12, d9, d0 - vmull.u8 q13, d11, d0 - vmull.u8 q14, d12, d0 - - vext.8 d2, d2, d3, #1 ;construct src_ptr[1] - vext.8 d5, d5, d6, #1 - vext.8 d8, d8, d9, #1 - vext.8 d11, d11, d12, #1 - - vmlal.u8 q7, d2, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q9, d5, d1 - vmlal.u8 q11, d8, d1 - vmlal.u8 q13, d11, d1 - - vext.8 d3, d3, d4, #1 - vext.8 d6, d6, d7, #1 - vext.8 d9, d9, d10, #1 - vext.8 d12, d12, d13, #1 - - vmlal.u8 q8, d3, d1 ;(src_ptr[0] * Filter[1]) - vmlal.u8 q10, d6, d1 - vmlal.u8 q12, d9, d1 - vmlal.u8 q14, d12, d1 - - subs r2, r2, #1 - - vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d15, q8, #7 - vqrshrn.u16 d16, q9, #7 - vqrshrn.u16 d17, q10, #7 - vqrshrn.u16 d18, q11, #7 - vqrshrn.u16 d19, q12, #7 - vqrshrn.u16 d20, q13, #7 - vst1.u8 {d14, d15}, [r3]! ;store result - vqrshrn.u16 d21, q14, #7 - - vst1.u8 {d16, d17}, [r3]! - vst1.u8 {d18, d19}, [r3]! - vst1.u8 {d20, d21}, [r3]! - - bne vp8e_filt_blk2d_fpo16x16_loop_neon - - b sub_pixel_variance16x16_neon - -;--------------------- -secondpass_bfilter16x16_only -;Second pass: 16x16 -;secondpass_filter - sub sp, sp, #528 ;reserve space on stack for temporary storage - add r3, r12, r3, lsl #3 - mov r12, #4 ;loop counter - vld1.u32 {d31}, [r3] ;load second_pass filter - vld1.u8 {d22, d23}, [r0], r1 ;load src data - mov r3, sp - - vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) - vdup.8 d1, d31[4] - -vp8e_filt_blk2d_spo16x16_loop_neon - vld1.u8 {d24, d25}, [r0], r1 - vmull.u8 q1, d22, d0 ;(src_ptr[0] * Filter[0]) - vld1.u8 {d26, d27}, [r0], r1 - vmull.u8 q2, d23, d0 - vld1.u8 {d28, d29}, [r0], r1 - vmull.u8 q3, d24, d0 - vld1.u8 {d30, d31}, [r0], r1 - - vmull.u8 q4, d25, d0 - vmull.u8 q5, d26, d0 - vmull.u8 q6, d27, d0 - vmull.u8 q7, d28, d0 - vmull.u8 q8, d29, d0 - - vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * Filter[1]) - vmlal.u8 q2, d25, d1 - vmlal.u8 q3, d26, d1 - vmlal.u8 q4, d27, d1 - vmlal.u8 q5, d28, d1 - vmlal.u8 q6, d29, d1 - vmlal.u8 q7, d30, d1 - vmlal.u8 q8, d31, d1 - - vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 - vqrshrn.u16 d3, q2, #7 - vqrshrn.u16 d4, q3, #7 - vqrshrn.u16 d5, q4, #7 - vqrshrn.u16 d6, q5, #7 - vqrshrn.u16 d7, q6, #7 - vqrshrn.u16 d8, q7, #7 - vqrshrn.u16 d9, q8, #7 - - vst1.u8 {d2, d3}, [r3]! ;store result - subs r12, r12, #1 - vst1.u8 {d4, d5}, [r3]! - vmov q11, q15 - vst1.u8 {d6, d7}, [r3]! - vst1.u8 {d8, d9}, [r3]! - - bne vp8e_filt_blk2d_spo16x16_loop_neon - - b sub_pixel_variance16x16_neon - -;---------------------------- -;variance16x16 -sub_pixel_variance16x16_neon - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - - sub r3, r3, #256 - mov r12, #8 - -sub_pixel_variance16x16_neon_loop - vld1.8 {q0}, [r3]! ;Load up source and reference - vld1.8 {q2}, [r4], r5 - vld1.8 {q1}, [r3]! - vld1.8 {q3}, [r4], r5 - - vsubl.u8 q11, d0, d4 ;diff - vsubl.u8 q12, d1, d5 - vsubl.u8 q13, d2, d6 - vsubl.u8 q14, d3, d7 - - vpadal.s16 q8, q11 ;sum - vmlal.s16 q9, d22, d22 ;sse - vmlal.s16 q10, d23, d23 - - subs r12, r12, #1 - - vpadal.s16 q8, q12 - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - vpadal.s16 q8, q13 - vmlal.s16 q9, d26, d26 - vmlal.s16 q10, d27, d27 - vpadal.s16 q8, q14 - vmlal.s16 q9, d28, d28 - vmlal.s16 q10, d29, d29 - - bne sub_pixel_variance16x16_neon_loop - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [r6] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 - - add sp, sp, #528 - vmov.32 r0, d0[0] ;return - - pop {r4-r6,pc} - - ENDP - -;----------------- - -_BilinearTaps_coeff_ - DCD bilinear_taps_coeff -bilinear_taps_coeff - DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 - - END diff --git a/vp9/encoder/arm/neon/vp9_subpixelvariance16x16s_neon.asm b/vp9/encoder/arm/neon/vp9_subpixelvariance16x16s_neon.asm deleted file mode 100644 index a3faf9a77..000000000 --- a/vp9/encoder/arm/neon/vp9_subpixelvariance16x16s_neon.asm +++ /dev/null @@ -1,572 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp9_variance_halfpixvar16x16_h_neon| - EXPORT |vp9_variance_halfpixvar16x16_v_neon| - EXPORT |vp9_variance_halfpixvar16x16_hv_neon| - EXPORT |vp9_sub_pixel_variance16x16s_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;================================================ -;unsigned int vp9_variance_halfpixvar16x16_h_neon -;( -; unsigned char *src_ptr, r0 -; int src_pixels_per_line, r1 -; unsigned char *dst_ptr, r2 -; int dst_pixels_per_line, r3 -; unsigned int *sse -;); -;================================================ -|vp9_variance_halfpixvar16x16_h_neon| PROC - push {lr} - - mov r12, #4 ;loop counter - ldr lr, [sp, #4] ;load *sse from stack - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - -;First Pass: output_height lines x output_width columns (16x16) -vp8_filt_fpo16x16s_4_0_loop_neon - vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data - vld1.8 {q11}, [r2], r3 - vld1.u8 {d4, d5, d6, d7}, [r0], r1 - vld1.8 {q12}, [r2], r3 - vld1.u8 {d8, d9, d10, d11}, [r0], r1 - vld1.8 {q13}, [r2], r3 - vld1.u8 {d12, d13, d14, d15}, [r0], r1 - - ;pld [r0] - ;pld [r0, r1] - ;pld [r0, r1, lsl #1] - - vext.8 q1, q0, q1, #1 ;construct src_ptr[1] - vext.8 q3, q2, q3, #1 - vext.8 q5, q4, q5, #1 - vext.8 q7, q6, q7, #1 - - vrhadd.u8 q0, q0, q1 ;(src_ptr[0]+src_ptr[1])/round/shift right 1 - vld1.8 {q14}, [r2], r3 - vrhadd.u8 q1, q2, q3 - vrhadd.u8 q2, q4, q5 - vrhadd.u8 q3, q6, q7 - - vsubl.u8 q4, d0, d22 ;diff - vsubl.u8 q5, d1, d23 - vsubl.u8 q6, d2, d24 - vsubl.u8 q7, d3, d25 - vsubl.u8 q0, d4, d26 - vsubl.u8 q1, d5, d27 - vsubl.u8 q2, d6, d28 - vsubl.u8 q3, d7, d29 - - vpadal.s16 q8, q4 ;sum - vmlal.s16 q9, d8, d8 ;sse - vmlal.s16 q10, d9, d9 - - subs r12, r12, #1 - - vpadal.s16 q8, q5 - vmlal.s16 q9, d10, d10 - vmlal.s16 q10, d11, d11 - vpadal.s16 q8, q6 - vmlal.s16 q9, d12, d12 - vmlal.s16 q10, d13, d13 - vpadal.s16 q8, q7 - vmlal.s16 q9, d14, d14 - vmlal.s16 q10, d15, d15 - - vpadal.s16 q8, q0 ;sum - vmlal.s16 q9, d0, d0 ;sse - vmlal.s16 q10, d1, d1 - vpadal.s16 q8, q1 - vmlal.s16 q9, d2, d2 - vmlal.s16 q10, d3, d3 - vpadal.s16 q8, q2 - vmlal.s16 q9, d4, d4 - vmlal.s16 q10, d5, d5 - vpadal.s16 q8, q3 - vmlal.s16 q9, d6, d6 - vmlal.s16 q10, d7, d7 - - bne vp8_filt_fpo16x16s_4_0_loop_neon - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] ;return - pop {pc} - ENDP - -;================================================ -;unsigned int vp9_variance_halfpixvar16x16_v_neon -;( -; unsigned char *src_ptr, r0 -; int src_pixels_per_line, r1 -; unsigned char *dst_ptr, r2 -; int dst_pixels_per_line, r3 -; unsigned int *sse -;); -;================================================ -|vp9_variance_halfpixvar16x16_v_neon| PROC - push {lr} - - mov r12, #4 ;loop counter - - vld1.u8 {q0}, [r0], r1 ;load src data - ldr lr, [sp, #4] ;load *sse from stack - - vmov.i8 q8, #0 ;q8 - sum - vmov.i8 q9, #0 ;q9, q10 - sse - vmov.i8 q10, #0 - -vp8_filt_spo16x16s_0_4_loop_neon - vld1.u8 {q2}, [r0], r1 - vld1.8 {q1}, [r2], r3 - vld1.u8 {q4}, [r0], r1 - vld1.8 {q3}, [r2], r3 - vld1.u8 {q6}, [r0], r1 - vld1.8 {q5}, [r2], r3 - vld1.u8 {q15}, [r0], r1 - - vrhadd.u8 q0, q0, q2 - vld1.8 {q7}, [r2], r3 - vrhadd.u8 q2, q2, q4 - vrhadd.u8 q4, q4, q6 - vrhadd.u8 q6, q6, q15 - - vsubl.u8 q11, d0, d2 ;diff - vsubl.u8 q12, d1, d3 - vsubl.u8 q13, d4, d6 - vsubl.u8 q14, d5, d7 - vsubl.u8 q0, d8, d10 - vsubl.u8 q1, d9, d11 - vsubl.u8 q2, d12, d14 - vsubl.u8 q3, d13, d15 - - vpadal.s16 q8, q11 ;sum - vmlal.s16 q9, d22, d22 ;sse - vmlal.s16 q10, d23, d23 - - subs r12, r12, #1 - - vpadal.s16 q8, q12 - vmlal.s16 q9, d24, d24 - vmlal.s16 q10, d25, d25 - vpadal.s16 q8, q13 - vmlal.s16 q9, d26, d26 - vmlal.s16 q10, d27, d27 - vpadal.s16 q8, q14 - vmlal.s16 q9, d28, d28 - vmlal.s16 q10, d29, d29 - - vpadal.s16 q8, q0 ;sum - vmlal.s16 q9, d0, d0 ;sse - vmlal.s16 q10, d1, d1 - vpadal.s16 q8, q1 - vmlal.s16 q9, d2, d2 - vmlal.s16 q10, d3, d3 - vpadal.s16 q8, q2 - vmlal.s16 q9, d4, d4 - vmlal.s16 q10, d5, d5 - - vmov q0, q15 - - vpadal.s16 q8, q3 - vmlal.s16 q9, d6, d6 - vmlal.s16 q10, d7, d7 - - bne vp8_filt_spo16x16s_0_4_loop_neon - - vadd.u32 q10, q9, q10 ;accumulate sse - vpaddl.s32 q0, q8 ;accumulate sum - - vpaddl.u32 q1, q10 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] ;return - pop {pc} - ENDP - -;================================================ -;unsigned int vp9_variance_halfpixvar16x16_hv_neon -;( -; unsigned char *src_ptr, r0 -; int src_pixels_per_line, r1 -; unsigned char *dst_ptr, r2 -; int dst_pixels_per_line, r3 -; unsigned int *sse -;); -;================================================ -|vp9_variance_halfpixvar16x16_hv_neon| PROC - push {lr} - - vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data - - ldr lr, [sp, #4] ;load *sse from stack - vmov.i8 q13, #0 ;q8 - sum - vext.8 q1, q0, q1, #1 ;construct src_ptr[1] - - vmov.i8 q14, #0 ;q9, q10 - sse - vmov.i8 q15, #0 - - mov r12, #4 ;loop counter - vrhadd.u8 q0, q0, q1 ;(src_ptr[0]+src_ptr[1])/round/shift right 1 - -;First Pass: output_height lines x output_width columns (17x16) -vp8_filt16x16s_4_4_loop_neon - vld1.u8 {d4, d5, d6, d7}, [r0], r1 - vld1.u8 {d8, d9, d10, d11}, [r0], r1 - vld1.u8 {d12, d13, d14, d15}, [r0], r1 - vld1.u8 {d16, d17, d18, d19}, [r0], r1 - - ;pld [r0] - ;pld [r0, r1] - ;pld [r0, r1, lsl #1] - - vext.8 q3, q2, q3, #1 ;construct src_ptr[1] - vext.8 q5, q4, q5, #1 - vext.8 q7, q6, q7, #1 - vext.8 q9, q8, q9, #1 - - vrhadd.u8 q1, q2, q3 ;(src_ptr[0]+src_ptr[1])/round/shift right 1 - vrhadd.u8 q2, q4, q5 - vrhadd.u8 q3, q6, q7 - vrhadd.u8 q4, q8, q9 - - vld1.8 {q5}, [r2], r3 - vrhadd.u8 q0, q0, q1 - vld1.8 {q6}, [r2], r3 - vrhadd.u8 q1, q1, q2 - vld1.8 {q7}, [r2], r3 - vrhadd.u8 q2, q2, q3 - vld1.8 {q8}, [r2], r3 - vrhadd.u8 q3, q3, q4 - - vsubl.u8 q9, d0, d10 ;diff - vsubl.u8 q10, d1, d11 - vsubl.u8 q11, d2, d12 - vsubl.u8 q12, d3, d13 - - vsubl.u8 q0, d4, d14 ;diff - vsubl.u8 q1, d5, d15 - vsubl.u8 q5, d6, d16 - vsubl.u8 q6, d7, d17 - - vpadal.s16 q13, q9 ;sum - vmlal.s16 q14, d18, d18 ;sse - vmlal.s16 q15, d19, d19 - - vpadal.s16 q13, q10 ;sum - vmlal.s16 q14, d20, d20 ;sse - vmlal.s16 q15, d21, d21 - - vpadal.s16 q13, q11 ;sum - vmlal.s16 q14, d22, d22 ;sse - vmlal.s16 q15, d23, d23 - - vpadal.s16 q13, q12 ;sum - vmlal.s16 q14, d24, d24 ;sse - vmlal.s16 q15, d25, d25 - - subs r12, r12, #1 - - vpadal.s16 q13, q0 ;sum - vmlal.s16 q14, d0, d0 ;sse - vmlal.s16 q15, d1, d1 - - vpadal.s16 q13, q1 ;sum - vmlal.s16 q14, d2, d2 ;sse - vmlal.s16 q15, d3, d3 - - vpadal.s16 q13, q5 ;sum - vmlal.s16 q14, d10, d10 ;sse - vmlal.s16 q15, d11, d11 - - vmov q0, q4 - - vpadal.s16 q13, q6 ;sum - vmlal.s16 q14, d12, d12 ;sse - vmlal.s16 q15, d13, d13 - - bne vp8_filt16x16s_4_4_loop_neon - - vadd.u32 q15, q14, q15 ;accumulate sse - vpaddl.s32 q0, q13 ;accumulate sum - - vpaddl.u32 q1, q15 - vadd.s64 d0, d0, d1 - vadd.u64 d1, d2, d3 - - vmull.s32 q5, d0, d0 - vst1.32 {d1[0]}, [lr] ;store sse - vshr.s32 d10, d10, #8 - vsub.s32 d0, d1, d10 - - vmov.32 r0, d0[0] ;return - pop {pc} - ENDP - -;============================== -; r0 unsigned char *src_ptr, -; r1 int src_pixels_per_line, -; r2 int xoffset, -; r3 int yoffset, -; stack unsigned char *dst_ptr, -; stack int dst_pixels_per_line, -; stack unsigned int *sse -;note: in vp8_find_best_half_pixel_step()(called when 8common.rtcd.flags; - -#if HAVE_ARMV5TE - if (flags & HAS_EDSP) { - } -#endif - -#if HAVE_ARMV6 - if (flags & HAS_MEDIA) { - cpi->rtcd.variance.sad16x16 = vp9_sad16x16_armv6; - /*cpi->rtcd.variance.sad16x8 = vp9_sad16x8_c; - cpi->rtcd.variance.sad8x16 = vp9_sad8x16_c; - cpi->rtcd.variance.sad8x8 = vp9_sad8x8_c; - cpi->rtcd.variance.sad4x4 = vp9_sad4x4_c;*/ - - /*cpi->rtcd.variance.var4x4 = vp9_variance4x4_c;*/ - cpi->rtcd.variance.var8x8 = vp9_variance8x8_armv6; - /*cpi->rtcd.variance.var8x16 = vp9_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp9_variance16x8_c;*/ - cpi->rtcd.variance.var16x16 = vp9_variance16x16_armv6; - - /*cpi->rtcd.variance.subpixvar4x4 = vp9_sub_pixel_variance4x4_c;*/ - cpi->rtcd.variance.subpixvar8x8 = vp9_sub_pixel_variance8x8_armv6; - /*cpi->rtcd.variance.subpixvar8x16 = vp9_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp9_sub_pixel_variance16x8_c;*/ - cpi->rtcd.variance.subpixvar16x16 = vp9_sub_pixel_variance16x16_armv6; - cpi->rtcd.variance.halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_armv6; - cpi->rtcd.variance.halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_armv6; - cpi->rtcd.variance.halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_armv6; - - cpi->rtcd.variance.mse16x16 = vp9_mse16x16_armv6; - /*cpi->rtcd.variance.getmbss = vp9_get_mb_ss_c;*/ - - cpi->rtcd.fdct.short4x4 = vp9_short_fdct4x4_armv6; - cpi->rtcd.fdct.short8x4 = vp9_short_fdct8x4_armv6; - cpi->rtcd.fdct.fast4x4 = vp9_short_fdct4x4_armv6; - cpi->rtcd.fdct.fast8x4 = vp9_short_fdct8x4_armv6; - cpi->rtcd.fdct.walsh_short4x4 = vp9_short_walsh4x4_armv6; - - /*cpi->rtcd.encodemb.berr = vp9_block_error_c; - cpi->rtcd.encodemb.mberr = vp9_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp9_mbuverror_c;*/ - cpi->rtcd.encodemb.subb = vp9_subtract_b_armv6; - cpi->rtcd.encodemb.submby = vp9_subtract_mby_armv6; - cpi->rtcd.encodemb.submbuv = vp9_subtract_mbuv_armv6; - - /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;*/ - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_armv6; - } -#endif - -#if HAVE_ARMV7 - if (flags & HAS_NEON) { - cpi->rtcd.variance.sad16x16 = vp9_sad16x16_neon; - cpi->rtcd.variance.sad16x8 = vp9_sad16x8_neon; - cpi->rtcd.variance.sad8x16 = vp9_sad8x16_neon; - cpi->rtcd.variance.sad8x8 = vp9_sad8x8_neon; - cpi->rtcd.variance.sad4x4 = vp9_sad4x4_neon; - - /*cpi->rtcd.variance.var4x4 = vp9_variance4x4_c;*/ - cpi->rtcd.variance.var8x8 = vp9_variance8x8_neon; - cpi->rtcd.variance.var8x16 = vp9_variance8x16_neon; - cpi->rtcd.variance.var16x8 = vp9_variance16x8_neon; - cpi->rtcd.variance.var16x16 = vp9_variance16x16_neon; - - /*cpi->rtcd.variance.subpixvar4x4 = vp9_sub_pixel_variance4x4_c;*/ - cpi->rtcd.variance.subpixvar8x8 = vp9_sub_pixel_variance8x8_neon; - /*cpi->rtcd.variance.subpixvar8x16 = vp9_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp9_sub_pixel_variance16x8_c;*/ - cpi->rtcd.variance.subpixvar16x16 = vp9_sub_pixel_variance16x16_neon; - cpi->rtcd.variance.halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_neon; - cpi->rtcd.variance.halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_neon; - cpi->rtcd.variance.halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_neon; - - cpi->rtcd.variance.mse16x16 = vp9_mse16x16_neon; - /*cpi->rtcd.variance.getmbss = vp9_get_mb_ss_c;*/ - - cpi->rtcd.fdct.short4x4 = vp9_short_fdct4x4_neon; - cpi->rtcd.fdct.short8x4 = vp9_short_fdct8x4_neon; - cpi->rtcd.fdct.fast4x4 = vp9_short_fdct4x4_neon; - cpi->rtcd.fdct.fast8x4 = vp9_short_fdct8x4_neon; - cpi->rtcd.fdct.walsh_short4x4 = vp9_short_walsh4x4_neon; - - /*cpi->rtcd.encodemb.berr = vp9_block_error_c; - cpi->rtcd.encodemb.mberr = vp9_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp9_mbuverror_c;*/ - cpi->rtcd.encodemb.subb = vp9_subtract_b_neon; - cpi->rtcd.encodemb.submby = vp9_subtract_mby_neon; - cpi->rtcd.encodemb.submbuv = vp9_subtract_mbuv_neon; - - /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.quantb_pair = vp8_regular_quantize_b_pair;*/ - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon; - cpi->rtcd.quantize.fastquantb_pair = vp8_fast_quantize_b_pair_neon; - } -#endif - -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (flags & HAS_NEON) -#endif - { - vp9_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon; - } -#endif -#endif -} diff --git a/vp9/encoder/arm/vp9_boolhuff_arm.c b/vp9/encoder/arm/vp9_boolhuff_arm.c deleted file mode 100644 index 9ff8e5f56..000000000 --- a/vp9/encoder/arm/vp9_boolhuff_arm.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp9/encoder/vp9_boolhuff.h" -#include "vp9/common/vp9_blockd.h" - -const unsigned int vp9_prob_cost[256] = { - 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046, - 1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, 843, 829, 816, 803, 790, 778, - 767, 755, 744, 733, 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625, - 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516, - 511, 505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442, 437, 433, - 428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365, - 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, 317, 314, 311, 307, - 304, 301, 297, 294, 291, 288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257, - 255, 252, 249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214, - 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, 181, 179, 177, 174, - 172, 170, 168, 165, 163, 161, 159, 156, 154, 152, 150, 148, 145, 143, 141, 139, - 137, 135, 133, 131, 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, - 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77, - 75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 53, 51, 50, - 48, 46, 45, 43, 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24, - 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1 -}; - diff --git a/vp9/encoder/arm/vp9_dct_arm.c b/vp9/encoder/arm/vp9_dct_arm.c deleted file mode 100644 index 5e20a4723..000000000 --- a/vp9/encoder/arm/vp9_dct_arm.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "./vp9_rtcd.h" - -#if HAVE_ARMV6 - -void vp9_short_fdct8x4_armv6(short *input, short *output, int pitch) { - vp9_short_fdct4x4_armv6(input, output, pitch); - vp9_short_fdct4x4_armv6(input + 4, output + 16, pitch); -} - -#endif /* HAVE_ARMV6 */ diff --git a/vp9/encoder/arm/vp9_dct_arm.h b/vp9/encoder/arm/vp9_dct_arm.h deleted file mode 100644 index 8eed31e60..000000000 --- a/vp9/encoder/arm/vp9_dct_arm.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_ARM_VP9_DCT_ARM_H_ -#define VP9_ENCODER_ARM_VP9_DCT_ARM_H_ - -#if HAVE_ARMV6 -extern prototype_fdct(vp9_short_walsh4x4_armv6); -extern prototype_fdct(vp9_short_fdct4x4_armv6); -extern prototype_fdct(vp9_short_fdct8x4_armv6); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_fdct_walsh_short4x4 -#define vp8_fdct_walsh_short4x4 vp9_short_walsh4x4_armv6 - -#undef vp8_fdct_short4x4 -#define vp8_fdct_short4x4 vp9_short_fdct4x4_armv6 - -#undef vp8_fdct_short8x4 -#define vp8_fdct_short8x4 vp9_short_fdct8x4_armv6 - -#undef vp8_fdct_fast4x4 -#define vp8_fdct_fast4x4 vp9_short_fdct4x4_armv6 - -#undef vp8_fdct_fast8x4 -#define vp8_fdct_fast8x4 vp9_short_fdct8x4_armv6 -#endif - -#endif /* HAVE_ARMV6 */ - -#if HAVE_ARMV7 -extern prototype_fdct(vp9_short_fdct4x4_neon); -extern prototype_fdct(vp9_short_fdct8x4_neon); -extern prototype_fdct(vp8_fast_fdct4x4_neon); -extern prototype_fdct(vp8_fast_fdct8x4_neon); -extern prototype_fdct(vp9_short_walsh4x4_neon); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_fdct_short4x4 -#define vp8_fdct_short4x4 vp9_short_fdct4x4_neon - -#undef vp8_fdct_short8x4 -#define vp8_fdct_short8x4 vp9_short_fdct8x4_neon - -#undef vp8_fdct_fast4x4 -#define vp8_fdct_fast4x4 vp9_short_fdct4x4_neon - -#undef vp8_fdct_fast8x4 -#define vp8_fdct_fast8x4 vp9_short_fdct8x4_neon - -#undef vp8_fdct_walsh_short4x4 -#define vp8_fdct_walsh_short4x4 vp9_short_walsh4x4_neon -#endif - -#endif - -#endif diff --git a/vp9/encoder/arm/vp9_encodemb_arm.h b/vp9/encoder/arm/vp9_encodemb_arm.h deleted file mode 100644 index 2f21d2cba..000000000 --- a/vp9/encoder/arm/vp9_encodemb_arm.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_ARM_VP9_ENCODEMB_ARM_H_ -#define VP9_ENCODER_ARM_VP9_ENCODEMB_ARM_H_ - -#if HAVE_ARMV6 -extern prototype_subb(vp9_subtract_b_armv6); -extern prototype_submby(vp9_subtract_mby_armv6); -extern prototype_submbuv(vp9_subtract_mbuv_armv6); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_encodemb_subb -#define vp8_encodemb_subb vp9_subtract_b_armv6 - -#undef vp8_encodemb_submby -#define vp8_encodemb_submby vp9_subtract_mby_armv6 - -#undef vp8_encodemb_submbuv -#define vp8_encodemb_submbuv vp9_subtract_mbuv_armv6 -#endif - -#endif /* HAVE_ARMV6 */ - -#if HAVE_ARMV7 -// extern prototype_berr(vp9_block_error_c); -// extern prototype_mberr(vp9_mbblock_error_c); -// extern prototype_mbuverr(vp9_mbuverror_c); - -extern prototype_subb(vp9_subtract_b_neon); -extern prototype_submby(vp9_subtract_mby_neon); -extern prototype_submbuv(vp9_subtract_mbuv_neon); - -// #undef vp8_encodemb_berr -// #define vp8_encodemb_berr vp9_block_error_c - -// #undef vp8_encodemb_mberr -// #define vp8_encodemb_mberr vp9_mbblock_error_c - -// #undef vp8_encodemb_mbuverr -// #define vp8_encodemb_mbuverr vp9_mbuverror_c - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_encodemb_subb -#define vp8_encodemb_subb vp9_subtract_b_neon - -#undef vp8_encodemb_submby -#define vp8_encodemb_submby vp9_subtract_mby_neon - -#undef vp8_encodemb_submbuv -#define vp8_encodemb_submbuv vp9_subtract_mbuv_neon -#endif - -#endif - -#endif diff --git a/vp9/encoder/arm/vp9_quantize_arm.c b/vp9/encoder/arm/vp9_quantize_arm.c deleted file mode 100644 index aacaa529c..000000000 --- a/vp9/encoder/arm/vp9_quantize_arm.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include -#include "vpx_mem/vpx_mem.h" - -#include "vp9/encoder/vp9_quantize.h" -#include "vp9/common/vp9_entropy.h" - - -#if HAVE_ARMV7 - -/* vp8_quantize_mbX functions here differs from corresponding ones in - * vp9_quantize.c only by using quantize_b_pair function pointer instead of - * the regular quantize_b function pointer */ -void vp8_quantize_mby_neon(MACROBLOCK *x) { - int i; - int has_2nd_order = get_2nd_order_usage(xd); - - for (i = 0; i < 16; i += 2) - x->quantize_b_pair(&x->block[i], &x->block[i + 1], - &x->e_mbd.block[i], &x->e_mbd.block[i + 1]); - - if (has_2nd_order) - x->quantize_b(&x->block[24], &x->e_mbd.block[24]); -} - -void vp8_quantize_mb_neon(MACROBLOCK *x) { - int i; - int has_2nd_order = get_2nd_order_usage(xd); - - for (i = 0; i < 24; i += 2) - x->quantize_b_pair(&x->block[i], &x->block[i + 1], - &x->e_mbd.block[i], &x->e_mbd.block[i + 1]); - - if (has_2nd_order) - x->quantize_b(&x->block[i], &x->e_mbd.block[i]); -} - - -void vp8_quantize_mbuv_neon(MACROBLOCK *x) { - int i; - - for (i = 16; i < 24; i += 2) - x->quantize_b_pair(&x->block[i], &x->block[i + 1], - &x->e_mbd.block[i], &x->e_mbd.block[i + 1]); -} - -#endif /* HAVE_ARMV7 */ diff --git a/vp9/encoder/arm/vp9_quantize_arm.h b/vp9/encoder/arm/vp9_quantize_arm.h deleted file mode 100644 index 41a83d7f9..000000000 --- a/vp9/encoder/arm/vp9_quantize_arm.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_ARM_VP9_QUANTIZE_ARM_H_ -#define VP9_ENCODER_ARM_VP9_QUANTIZE_ARM_H_ - -#if HAVE_ARMV6 - -extern prototype_quantize_block(vp8_fast_quantize_b_armv6); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_quantize_fastquantb -#define vp8_quantize_fastquantb vp8_fast_quantize_b_armv6 -#endif - -#endif /* HAVE_ARMV6 */ - - -#if HAVE_ARMV7 - -extern prototype_quantize_block(vp8_fast_quantize_b_neon); -extern prototype_quantize_block_pair(vp8_fast_quantize_b_pair_neon); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_quantize_fastquantb -#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon - -#undef vp8_quantize_fastquantb_pair -#define vp8_quantize_fastquantb_pair vp8_fast_quantize_b_pair_neon - -#undef vp8_quantize_mb -#define vp8_quantize_mb vp8_quantize_mb_neon - -#undef vp8_quantize_mbuv -#define vp8_quantize_mbuv vp8_quantize_mbuv_neon - -#undef vp8_quantize_mby -#define vp8_quantize_mby vp8_quantize_mby_neon -#endif - -#endif /* HAVE_ARMV7 */ - -#endif - diff --git a/vp9/encoder/arm/vp9_variance_arm.c b/vp9/encoder/arm/vp9_variance_arm.c deleted file mode 100644 index 91c0236e3..000000000 --- a/vp9/encoder/arm/vp9_variance_arm.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp9/encoder/vp9_variance.h" -#include "vp9/common/vp9_filter.h" -#include "vp9/common/arm/vp9_bilinearfilter_arm.h" - -#define HALFNDX 8 - -#if HAVE_ARMV6 - -unsigned int vp9_sub_pixel_variance8x8_armv6 -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - unsigned short first_pass[10 * 8]; - unsigned char second_pass[8 * 8]; - const short *HFilter, *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - vp9_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass, - src_pixels_per_line, - 9, 8, HFilter); - vp9_filter_block2d_bil_second_pass_armv6(first_pass, second_pass, - 8, 8, 8, VFilter); - - return vp9_variance8x8_armv6(second_pass, 8, dst_ptr, - dst_pixels_per_line, sse); -} - -unsigned int vp9_sub_pixel_variance16x16_armv6 -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - unsigned short first_pass[36 * 16]; - unsigned char second_pass[20 * 16]; - const short *HFilter, *VFilter; - unsigned int var; - - if (xoffset == HALFNDX && yoffset == 0) { - var = vp9_variance_halfpixvar16x16_h_armv6(src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, sse); - } else if (xoffset == 0 && yoffset == HALFNDX) { - var = vp9_variance_halfpixvar16x16_v_armv6(src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, sse); - } else if (xoffset == HALFNDX && yoffset == HALFNDX) { - var = vp9_variance_halfpixvar16x16_hv_armv6(src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, sse); - } else { - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - vp9_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass, - src_pixels_per_line, - 17, 16, HFilter); - vp9_filter_block2d_bil_second_pass_armv6(first_pass, second_pass, - 16, 16, 16, VFilter); - - var = vp9_variance16x16_armv6(second_pass, 16, dst_ptr, - dst_pixels_per_line, sse); - } - return var; -} - -#endif /* HAVE_ARMV6 */ - - -#if HAVE_ARMV7 - -unsigned int vp9_sub_pixel_variance16x16_neon -( - const unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -) { - if (xoffset == HALFNDX && yoffset == 0) - return vp9_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); - else if (xoffset == 0 && yoffset == HALFNDX) - return vp9_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); - else if (xoffset == HALFNDX && yoffset == HALFNDX) - return vp9_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); - else - return vp9_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); -} - -#endif diff --git a/vp9/encoder/arm/vp9_variance_arm.h b/vp9/encoder/arm/vp9_variance_arm.h deleted file mode 100644 index 144feea3d..000000000 --- a/vp9/encoder/arm/vp9_variance_arm.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_ARM_VP9_VARIANCE_ARM_H_ -#define VP9_ENCODER_ARM_VP9_VARIANCE_ARM_H_ - -#if HAVE_ARMV6 - -extern prototype_sad(vp9_sad16x16_armv6); -extern prototype_variance(vp9_variance16x16_armv6); -extern prototype_variance(vp9_variance8x8_armv6); -extern prototype_subpixvariance(vp9_sub_pixel_variance16x16_armv6); -extern prototype_subpixvariance(vp9_sub_pixel_variance8x8_armv6); -extern prototype_variance(vp9_variance_halfpixvar16x16_h_armv6); -extern prototype_variance(vp9_variance_halfpixvar16x16_v_armv6); -extern prototype_variance(vp9_variance_halfpixvar16x16_hv_armv6); -extern prototype_variance(vp9_mse16x16_armv6); - -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_variance_sad16x16 -#define vp9_variance_sad16x16 vp9_sad16x16_armv6 - -#undef vp9_variance_subpixvar16x16 -#define vp9_variance_subpixvar16x16 vp9_sub_pixel_variance16x16_armv6 - -#undef vp9_variance_subpixvar8x8 -#define vp9_variance_subpixvar8x8 vp9_sub_pixel_variance8x8_armv6 - -#undef vp9_variance_var16x16 -#define vp9_variance_var16x16 vp9_variance16x16_armv6 - -#undef vp9_variance_mse16x16 -#define vp9_variance_mse16x16 vp9_mse16x16_armv6 - -#undef vp9_variance_var8x8 -#define vp9_variance_var8x8 vp9_variance8x8_armv6 - -#undef vp9_variance_halfpixvar16x16_h -#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_armv6 - -#undef vp9_variance_halfpixvar16x16_v -#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_armv6 - -#undef vp9_variance_halfpixvar16x16_hv -#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_armv6 - -#endif /* !CONFIG_RUNTIME_CPU_DETECT */ - -#endif /* HAVE_ARMV6 */ - - -#if HAVE_ARMV7 -extern prototype_sad(vp9_sad4x4_neon); -extern prototype_sad(vp9_sad8x8_neon); -extern prototype_sad(vp9_sad8x16_neon); -extern prototype_sad(vp9_sad16x8_neon); -extern prototype_sad(vp9_sad16x16_neon); - -extern prototype_variance(vp9_variance8x8_neon); -extern prototype_variance(vp9_variance8x16_neon); -extern prototype_variance(vp9_variance16x8_neon); -extern prototype_variance(vp9_variance16x16_neon); - -extern prototype_subpixvariance(vp9_sub_pixel_variance8x8_neon); -extern prototype_subpixvariance(vp9_sub_pixel_variance16x16_neon); -extern prototype_subpixvariance(vp9_sub_pixel_variance16x16_neon_func); -extern prototype_variance(vp9_variance_halfpixvar16x16_h_neon); -extern prototype_variance(vp9_variance_halfpixvar16x16_v_neon); -extern prototype_variance(vp9_variance_halfpixvar16x16_hv_neon); - -extern prototype_variance(vp9_mse16x16_neon); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp9_variance_sad4x4 -#define vp9_variance_sad4x4 vp9_sad4x4_neon - -#undef vp9_variance_sad8x8 -#define vp9_variance_sad8x8 vp9_sad8x8_neon - -#undef vp9_variance_sad8x16 -#define vp9_variance_sad8x16 vp9_sad8x16_neon - -#undef vp9_variance_sad16x8 -#define vp9_variance_sad16x8 vp9_sad16x8_neon - -#undef vp9_variance_sad16x16 -#define vp9_variance_sad16x16 vp9_sad16x16_neon - -#undef vp9_variance_var8x8 -#define vp9_variance_var8x8 vp9_variance8x8_neon - -#undef vp9_variance_var8x16 -#define vp9_variance_var8x16 vp9_variance8x16_neon - -#undef vp9_variance_var16x8 -#define vp9_variance_var16x8 vp9_variance16x8_neon - -#undef vp9_variance_var16x16 -#define vp9_variance_var16x16 vp9_variance16x16_neon - -#undef vp9_variance_subpixvar8x8 -#define vp9_variance_subpixvar8x8 vp9_sub_pixel_variance8x8_neon - -#undef vp9_variance_subpixvar16x16 -#define vp9_variance_subpixvar16x16 vp9_sub_pixel_variance16x16_neon - -#undef vp9_variance_halfpixvar16x16_h -#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_neon - -#undef vp9_variance_halfpixvar16x16_v -#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_neon - -#undef vp9_variance_halfpixvar16x16_hv -#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_neon - -#undef vp9_variance_mse16x16 -#define vp9_variance_mse16x16 vp9_mse16x16_neon - -#endif - -#endif - -#endif diff --git a/vp9/encoder/vp9_asm_enc_offsets.c b/vp9/encoder/vp9_asm_enc_offsets.c index 3fe9c8fb7..30431ff8c 100644 --- a/vp9/encoder/vp9_asm_enc_offsets.c +++ b/vp9/encoder/vp9_asm_enc_offsets.c @@ -79,12 +79,4 @@ END /* add asserts for any offset that is not supported by assembly code * add asserts for any size that is not supported by assembly code - - * These are used in vp8cx_pack_tokens. They are hard coded so if their sizes - * change they will have to be adjusted. */ - -#if HAVE_ARMV5TE -ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8) -ct_assert(vp9_extra_bit_struct_sz, sizeof(vp9_extra_bit_struct) == 16) -#endif diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index f94e00c1e..4270a1d35 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -40,24 +40,12 @@ #include "vp9/common/vp9_mvref_common.h" #include "vp9/encoder/vp9_temporal_filter.h" -#if ARCH_ARM -#include "vpx_ports/arm.h" -#endif - #include #include #include extern void print_tree_update_probs(); -#if HAVE_ARMV7 -extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc); - -extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc); -#endif - static void set_default_lf_deltas(VP9_COMP *cpi); #define DEFAULT_INTERP_FILTER EIGHTTAP /* SWITCHABLE for better performance */ @@ -4055,33 +4043,15 @@ static void Pass2Encode(VP9_COMP *cpi, unsigned long *size, } } -// For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us. -#if HAVE_ARMV7 -extern void vp9_push_neon(int64_t *store); -extern void vp9_pop_neon(int64_t *store); -#endif - int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time) { -#if HAVE_ARMV7 - int64_t store_reg[8]; -#endif VP9_COMP *cpi = (VP9_COMP *) ptr; VP9_COMMON *cm = &cpi->common; struct vpx_usec_timer timer; int res = 0; -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_push_neon(store_reg); - } -#endif - vpx_usec_timer_start(&timer); if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL)) @@ -4090,15 +4060,6 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_pop_neon(store_reg); - } -#endif - return res; } @@ -4119,9 +4080,6 @@ static int frame_is_reference(const VP9_COMP *cpi) { int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, int64_t *time_stamp, int64_t *time_end, int flush) { -#if HAVE_ARMV7 - int64_t store_reg[8]; -#endif VP9_COMP *cpi = (VP9_COMP *) ptr; VP9_COMMON *cm = &cpi->common; struct vpx_usec_timer cmptimer; @@ -4130,15 +4088,6 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, if (!cpi) return -1; -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_push_neon(store_reg); - } -#endif - vpx_usec_timer_start(&cmptimer); cpi->source = NULL; @@ -4191,14 +4140,6 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cpi->twopass.first_pass_done = 1; } -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_pop_neon(store_reg); - } -#endif return -1; } @@ -4425,15 +4366,6 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, #endif -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp9_pop_neon(store_reg); - } -#endif - return 0; } diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 824951afa..f10fb3a1d 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -17,13 +17,6 @@ #include "vpx_scale/vpxscale.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_loopfilter.h" -#if ARCH_ARM -#include "vpx_ports/arm.h" -#endif - -#if HAVE_ARMV7 -extern void vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); -#endif void vp9_yv12_copy_partial_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction) { @@ -254,22 +247,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { int Bias = 0; // Bias against raising loop filter and in favour of lowering it // Make a copy of the unfiltered / processed recon buffer -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf); - } -#if CONFIG_RUNTIME_CPU_DETECT - else -#endif -#endif -#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT - { - vp8_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf); - } -#endif + vp8_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf); if (cm->frame_type == KEY_FRAME) cm->sharpness_level = 0; @@ -295,22 +273,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { filt_best = filt_mid; // Re-instate the unfiltered frame -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); - } -#if CONFIG_RUNTIME_CPU_DETECT - else -#endif -#endif -#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT - { - vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); - } -#endif + vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); while (filter_step > 0) { Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; // PGW change 12/12/06 for small images @@ -334,22 +297,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); // Re-instate the unfiltered frame -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); - } -#if CONFIG_RUNTIME_CPU_DETECT - else -#endif -#endif -#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT - { - vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); - } -#endif + vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); // If value is close to the best so far then bias towards a lower loop filter value. if ((filt_err - Bias) < best_err) { @@ -369,22 +317,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); // Re-instate the unfiltered frame -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); - } -#if CONFIG_RUNTIME_CPU_DETECT - else -#endif -#endif -#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT - { - vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); - } -#endif + vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); // Was it better than the previous best? if (filt_err < (best_err - Bias)) { @@ -405,4 +338,3 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { cm->filter_level = filt_best; } - diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index d801ca74b..dd11e75ba 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -26,10 +26,6 @@ #include "x86/vp9_quantize_x86.h" #endif -#if ARCH_ARM -#include "arm/vp9_quantize_arm.h" -#endif - #define prototype_quantize_block_type(sym) \ void (sym)(BLOCK *b, BLOCKD *d, TX_TYPE type) extern prototype_quantize_block_type(vp9_ht_quantize_b_4x4); diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 83ce9edf9..5d2fe6ff9 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -128,60 +128,6 @@ vp9/common/x86/vp9_loopfilter_x86.c.d: CFLAGS += -msse2 vp9/common/x86/vp9_sadmxn_x86.c.d: CFLAGS += -msse2 endif -VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/vp9_arm_systemdependent.c -VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/vp9_bilinearfilter_arm.c -VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/vp9_bilinearfilter_arm.h -VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/vp9_filter_arm.c -VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/vp9_idct_arm.h -VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/vp9_loopfilter_arm.c -VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/vp9_loopfilter_arm.h -VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/vp9_recon_arm.h -VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/vp9_reconintra_arm.c -VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/vp9_subpixel_arm.h - -# common (armv6) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x4_v6$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x8_v6$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem16x16_v6$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/dc_only_idct_add_v6$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/iwalsh_v6$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/filter_v6$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/idct_v6$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/loopfilter_v6$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/recon_v6$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/simpleloopfilter_v6$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/sixtappredict8x4_v6$(ASM) - -# common (neon) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict4x4_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict8x4_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict8x8_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict16x16_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem8x4_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem8x8_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem16x16_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/dc_only_idct_add_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/iwalsh_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilter_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilter_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon2b_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon4b_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/reconb_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/shortidct4x4llm_1_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/shortidct4x4llm_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict4x4_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict8x4_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict8x8_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict16x16_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon16x16mb_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/buildintrapredictorsmby_neon$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/save_neon_reg$(ASM) -VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/vp9_recon_neon.c - - $(eval $(call asm_offsets_template,\ vp9_asm_com_offsets.asm, $(VP9_PREFIX)common/vp9_asm_com_offsets.c)) diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index d20e79a6c..4ab0a9696 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -15,10 +15,6 @@ VP9_CX_SRCS-no += $(VP9_COMMON_SRCS-no) VP9_CX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes) VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) -ifeq ($(ARCH_ARM),yes) - include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9cx_arm.mk -endif - VP9_CX_SRCS-yes += vp9_cx_iface.c # encoder diff --git a/vp9/vp9cx_arm.mk b/vp9/vp9cx_arm.mk deleted file mode 100644 index d0108a84e..000000000 --- a/vp9/vp9cx_arm.mk +++ /dev/null @@ -1,63 +0,0 @@ -## -## Copyright (c) 2010 The WebM project authors. All Rights Reserved. -## -## Use of this source code is governed by a BSD-style license -## that can be found in the LICENSE file in the root of the source -## tree. An additional intellectual property rights grant can be found -## in the file PATENTS. All contributing project authors may -## be found in the AUTHORS file in the root of the source tree. -## - - -#VP9_CX_SRCS list is modified according to different platforms. - -#File list for arm -# encoder -VP9_CX_SRCS-$(ARCH_ARM) += encoder/arm/vp9_arm_csystemdependent.c - -VP9_CX_SRCS-$(ARCH_ARM) += encoder/arm/vp9_dct_arm.c -VP9_CX_SRCS-$(ARCH_ARM) += encoder/arm/vp9_dct_arm.h -VP9_CX_SRCS-$(ARCH_ARM) += encoder/arm/vp9_encodemb_arm.h -VP9_CX_SRCS-$(ARCH_ARM) += encoder/arm/vp9_quantize_arm.c -VP9_CX_SRCS-$(ARCH_ARM) += encoder/arm/vp9_quantize_arm.h -VP9_CX_SRCS-$(ARCH_ARM) += encoder/arm/vp9_variance_arm.c -VP9_CX_SRCS-$(ARCH_ARM) += encoder/arm/vp9_variance_arm.h - -#File list for armv5te -# encoder -VP9_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/vp9_boolhuff_arm.c -VP9_CX_SRCS_REMOVE-$(HAVE_ARMV5TE) += encoder/vp9_boolhuff.c -VP9_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/boolhuff_armv5te$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM) - -#File list for armv6 -# encoder -VP9_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_subtract_armv6$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_short_fdct4x4_armv6$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance8x8_armv6$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/walsh_v6$(ASM) - -#File list for neon -# encoder -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/fastquantizeb_neon$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp9_picklpf_arm.c -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/sad8_neon$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/sad16_neon$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/shortfdct_neon$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/subtract_neon$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/variance_neon$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_mse16x16_neon$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance8x8_neon$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16_neon$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_memcpy_neon$(ASM) -VP9_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM) diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk index 54af58d7f..004039016 100644 --- a/vp9/vp9dx.mk +++ b/vp9/vp9dx.mk @@ -15,36 +15,8 @@ VP9_DX_SRCS-no += $(VP9_COMMON_SRCS-no) VP9_DX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes) VP9_DX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) -ifeq ($(ARCH_ARM),yes) - include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9dx_arm.mk -endif - VP9_DX_SRCS-yes += vp9_dx_iface.c -# common -#define ARM -#define DISABLE_THREAD - -#INCLUDES += algo/vpx_common/vpx_mem/include -#INCLUDES += common -#INCLUDES += common -#INCLUDES += common -#INCLUDES += common -#INCLUDES += decoder - - - -# decoder -#define ARM -#define DISABLE_THREAD - -#INCLUDES += algo/vpx_common/vpx_mem/include -#INCLUDES += common -#INCLUDES += common -#INCLUDES += common -#INCLUDES += common -#INCLUDES += decoder - VP9_DX_SRCS-yes += decoder/vp9_asm_dec_offsets.c VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.c VP9_DX_SRCS-yes += decoder/vp9_decodemv.c diff --git a/vp9/vp9dx_arm.mk b/vp9/vp9dx_arm.mk deleted file mode 100644 index 32ec26afa..000000000 --- a/vp9/vp9dx_arm.mk +++ /dev/null @@ -1,29 +0,0 @@ -## -## Copyright (c) 2010 The WebM project authors. All Rights Reserved. -## -## Use of this source code is governed by a BSD-style license -## that can be found in the LICENSE file in the root of the source -## tree. An additional intellectual property rights grant can be found -## in the file PATENTS. All contributing project authors may -## be found in the AUTHORS file in the root of the source tree. -## - - -#VP8_DX_SRCS list is modified according to different platforms. - -VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/vp9_dequantize_arm.c - -#File list for armv6 -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_dc_idct_v6$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_idct_v6$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequantize_v6$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/vp9_idct_blk_v6.c - -#File list for neon -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_dc_full_2x_neon$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_dc_0_2x_neon$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequant_idct_neon$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_full_2x_neon$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_0_2x_neon$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequantizeb_neon$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/vp9_idct_blk_neon.c From c6bd29e2f55b14a0b0dacb5af407f485ee7ffb3f Mon Sep 17 00:00:00 2001 From: Johann Date: Mon, 3 Dec 2012 12:26:51 -0800 Subject: [PATCH 03/77] Begin to refactor vpx_scale usage in VP9 Only declare the functions in vpx_scale RTCD and include the relevant header. Remove unused files and functions in vpx_scale to avoid wasting time renaming. vpx_scale/win32/scaleopt.c contains functions which have not been called in a long time but are potentially optimized. The 'vp8' functions have not been renamed yet. That is for after the cleanup. Change-Id: I2c325a101d60fa9d27e7dfcd5b52a864b4a1e09c --- vp9/common/vp9_postproc.c | 1 + vp9/common/vp9_rtcd_defs.sh | 36 - vp9/decoder/vp9_onyxd_if.c | 1 + vp9/encoder/vp9_firstpass.c | 1 + vp9/encoder/vp9_onyx_if.c | 1 + vp9/encoder/vp9_picklpf.c | 1 + vpx_scale/generic/bicubic_scaler.c | 569 -------- vpx_scale/generic/gen_scalers.c | 682 ---------- vpx_scale/generic/vpxscale.c | 480 ------- .../include/generic/vpxscale_arbitrary.h | 55 - .../include/generic/vpxscale_depricated.h | 34 - vpx_scale/scale_mode.h | 28 - vpx_scale/vpx_scale.mk | 1 - vpx_scale/vpx_scale_rtcd.sh | 15 - vpx_scale/vpxscale.h | 10 - vpx_scale/win32/scaleopt.c | 1193 ----------------- vpx_scale/win32/scalesystemdependent.c | 87 -- 17 files changed, 5 insertions(+), 3190 deletions(-) delete mode 100644 vpx_scale/generic/bicubic_scaler.c delete mode 100644 vpx_scale/include/generic/vpxscale_arbitrary.h delete mode 100644 vpx_scale/include/generic/vpxscale_depricated.h delete mode 100644 vpx_scale/scale_mode.h delete mode 100644 vpx_scale/win32/scalesystemdependent.c diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c index 2cf3b6f77..f00edf00d 100644 --- a/vp9/common/vp9_postproc.c +++ b/vp9/common/vp9_postproc.c @@ -16,6 +16,7 @@ #include "vpx_scale/vpxscale.h" #include "vp9/common/vp9_systemdependent.h" #include "./vp9_rtcd.h" +#include "./vpx_scale_rtcd.h" #include diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index ddc64886d..5b7af100b 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -677,39 +677,3 @@ specialize vp9_yv12_copy_partial_frame fi # end encoder functions - -# Scaler functions -if [ "$CONFIG_SPATIAL_RESAMPLING" = "yes" ]; then - prototype void vp8_horizontal_line_4_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_last_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_2_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_last_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_3_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_last_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_3_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_last_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_1_2_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_last_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_5_3_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_2_1_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_2_1_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_vertical_band_2_1_scale_i "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" -fi - -prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf" -specialize vp8_yv12_extend_frame_borders - -prototype void vp8_yv12_copy_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc" -specialize vp8_yv12_copy_frame - -prototype void vp8_yv12_copy_y "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc" -specialize vp8_yv12_copy_y - diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index e01910d53..bad43cabd 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -28,6 +28,7 @@ #include "vpx_ports/vpx_timer.h" #include "vp9/decoder/vp9_decodframe.h" #include "vp9/decoder/vp9_detokenize.h" +#include "./vpx_scale_rtcd.h" static int get_free_fb(VP9_COMMON *cm); static void ref_cnt_fb(int *buf, int *idx, int new_idx); diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 6179f2cf9..71a8cefe6 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -31,6 +31,7 @@ #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/encoder/vp9_encodemv.h" +#include "./vpx_scale_rtcd.h" #define OUTPUT_FPF 0 diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 4270a1d35..8cabed9c9 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -24,6 +24,7 @@ #include "vp9/common/vp9_quant_common.h" #include "vp9/encoder/vp9_segmentation.h" #include "./vp9_rtcd.h" +#include "./vpx_scale_rtcd.h" #if CONFIG_POSTPROC #include "vp9/common/vp9_postproc.h" #endif diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index f10fb3a1d..90ea6f14a 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -17,6 +17,7 @@ #include "vpx_scale/vpxscale.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_loopfilter.h" +#include "./vpx_scale_rtcd.h" void vp9_yv12_copy_partial_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction) { diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c deleted file mode 100644 index c116740da..000000000 --- a/vpx_scale/generic/bicubic_scaler.c +++ /dev/null @@ -1,569 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include -#include -#include -#include "vpx_mem/vpx_mem.h" -#include "vpxscale_arbitrary.h" - -#define FIXED_POINT - -#define MAX_IN_WIDTH 800 -#define MAX_IN_HEIGHT 600 -#define MAX_OUT_WIDTH 800 -#define MAX_OUT_HEIGHT 600 -#define MAX_OUT_DIMENSION ((MAX_OUT_WIDTH > MAX_OUT_HEIGHT) ? \ - MAX_OUT_WIDTH : MAX_OUT_HEIGHT) - -BICUBIC_SCALER_STRUCT g_b_scaler; -static int g_first_time = 1; - -#pragma DATA_SECTION(g_hbuf, "VP6_HEAP") -#pragma DATA_ALIGN (g_hbuf, 32); -unsigned char g_hbuf[MAX_OUT_DIMENSION]; - -#pragma DATA_SECTION(g_hbuf_uv, "VP6_HEAP") -#pragma DATA_ALIGN (g_hbuf_uv, 32); -unsigned char g_hbuf_uv[MAX_OUT_DIMENSION]; - - -#ifdef FIXED_POINT -static int a_i = 0.6 * 65536; -#else -static float a = -0.6; -#endif - -#ifdef FIXED_POINT -// 3 2 -// C0 = a*t - a*t -// -static short c0_fixed(unsigned int t) { - // put t in Q16 notation - unsigned short v1, v2; - - // Q16 - v1 = (a_i * t) >> 16; - v1 = (v1 * t) >> 16; - - // Q16 - v2 = (a_i * t) >> 16; - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - - // Q12 - return -((v1 - v2) >> 4); -} - -// 2 3 -// C1 = a*t + (3-2*a)*t - (2-a)*t -// -static short c1_fixed(unsigned int t) { - unsigned short v1, v2, v3; - unsigned short two, three; - - // Q16 - v1 = (a_i * t) >> 16; - - // Q13 - two = 2 << 13; - v2 = two - (a_i >> 3); - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - - // Q13 - three = 3 << 13; - v3 = three - (2 * (a_i >> 3)); - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; - - // Q12 - return (((v1 >> 3) - v2 + v3) >> 1); - -} - -// 2 3 -// C2 = 1 - (3-a)*t + (2-a)*t -// -static short c2_fixed(unsigned int t) { - unsigned short v1, v2, v3; - unsigned short two, three; - - // Q13 - v1 = 1 << 13; - - // Q13 - three = 3 << 13; - v2 = three - (a_i >> 3); - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - - // Q13 - two = 2 << 13; - v3 = two - (a_i >> 3); - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; - - // Q12 - return (v1 - v2 + v3) >> 1; -} - -// 2 3 -// C3 = a*t - 2*a*t + a*t -// -static short c3_fixed(unsigned int t) { - int v1, v2, v3; - - // Q16 - v1 = (a_i * t) >> 16; - - // Q15 - v2 = 2 * (a_i >> 1); - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - - // Q16 - v3 = (a_i * t) >> 16; - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; - - // Q12 - return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3); -} -#else -// 3 2 -// C0 = -a*t + a*t -// -float C0(float t) { - return -a * t * t * t + a * t * t; -} - -// 2 3 -// C1 = -a*t + (2*a+3)*t - (a+2)*t -// -float C1(float t) { - return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; -} - -// 2 3 -// C2 = 1 - (a+3)*t + (a+2)*t -// -float C2(float t) { - return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; -} - -// 2 3 -// C3 = a*t - 2*a*t + a*t -// -float C3(float t) { - return a * t * t * t - 2.0f * a * t * t + a * t; -} -#endif - -#if 0 -int compare_real_fixed() { - int i, errors = 0; - float mult = 1.0 / 10000.0; - unsigned int fixed_mult = mult * 4294967296;// 65536; - unsigned int phase_offset_int; - float phase_offset_real; - - for (i = 0; i < 10000; i++) { - int fixed0, fixed1, fixed2, fixed3, fixed_total; - int real0, real1, real2, real3, real_total; - - phase_offset_real = (float)i * mult; - phase_offset_int = (fixed_mult * i) >> 16; -// phase_offset_int = phase_offset_real * 65536; - - fixed0 = c0_fixed(phase_offset_int); - real0 = C0(phase_offset_real) * 4096.0; - - if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1))) - errors++; - - fixed1 = c1_fixed(phase_offset_int); - real1 = C1(phase_offset_real) * 4096.0; - - if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1))) - errors++; - - fixed2 = c2_fixed(phase_offset_int); - real2 = C2(phase_offset_real) * 4096.0; - - if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1))) - errors++; - - fixed3 = c3_fixed(phase_offset_int); - real3 = C3(phase_offset_real) * 4096.0; - - if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1))) - errors++; - - fixed_total = fixed0 + fixed1 + fixed2 + fixed3; - real_total = real0 + real1 + real2 + real3; - - if ((fixed_total > 4097) || (fixed_total < 4094)) - errors++; - - if ((real_total > 4097) || (real_total < 4095)) - errors++; - } - - return errors; -} -#endif - -// Find greatest common denominator between two integers. Method used here is -// slow compared to Euclid's algorithm, but does not require any division. -int gcd(int a, int b) { - // Problem with this algorithm is that if a or b = 0 this function - // will never exit. Don't want to return 0 because any computation - // that was based on a common denoninator and tried to reduce by - // dividing by 0 would fail. Best solution that could be thought of - // would to be fail by returing a 1; - if (a <= 0 || b <= 0) - return 1; - - while (a != b) { - if (b > a) - b = b - a; - else { - int tmp = a;// swap large and - a = b; // small - b = tmp; - } - } - - return b; -} - -void bicubic_coefficient_init() { - vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); - g_first_time = 0; -} - -void bicubic_coefficient_destroy() { - if (!g_first_time) { - vpx_free(g_b_scaler.l_w); - - vpx_free(g_b_scaler.l_h); - - vpx_free(g_b_scaler.l_h_uv); - - vpx_free(g_b_scaler.c_w); - - vpx_free(g_b_scaler.c_h); - - vpx_free(g_b_scaler.c_h_uv); - - vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); - } -} - -// Create the coeffients that will be used for the cubic interpolation. -// Because scaling does not have to be equal in the vertical and horizontal -// regimes the phase offsets will be different. There are 4 coefficents -// for each point, two on each side. The layout is that there are the -// 4 coefficents for each phase in the array and then the next phase. -int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) { - int i; -#ifdef FIXED_POINT - int phase_offset_int; - unsigned int fixed_mult; - int product_val = 0; -#else - float phase_offset; -#endif - int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv; - - if (g_first_time) - bicubic_coefficient_init(); - - - // check to see if the coefficents have already been set up correctly - if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height) - && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height)) - return 0; - - g_b_scaler.in_width = in_width; - g_b_scaler.in_height = in_height; - g_b_scaler.out_width = out_width; - g_b_scaler.out_height = out_height; - - // Don't want to allow crazy scaling, just try and prevent a catastrophic - // failure here. Want to fail after setting the member functions so if - // if the scaler is called the member functions will not scale. - if (out_width <= 0 || out_height <= 0) - return -1; - - // reduce in/out width and height ratios using the gcd - gcd_w = gcd(out_width, in_width); - gcd_h = gcd(out_height, in_height); - gcd_h_uv = gcd(out_height, in_height / 2); - - // the numerator width and height are to be saved in - // globals so they can be used during the scaling process - // without having to be recalculated. - g_b_scaler.nw = out_width / gcd_w; - d_w = in_width / gcd_w; - - g_b_scaler.nh = out_height / gcd_h; - d_h = in_height / gcd_h; - - g_b_scaler.nh_uv = out_height / gcd_h_uv; - d_h_uv = (in_height / 2) / gcd_h_uv; - - // allocate memory for the coefficents - vpx_free(g_b_scaler.l_w); - - vpx_free(g_b_scaler.l_h); - - vpx_free(g_b_scaler.l_h_uv); - - g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2); - g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2); - g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2); - - vpx_free(g_b_scaler.c_w); - - vpx_free(g_b_scaler.c_h); - - vpx_free(g_b_scaler.c_h_uv); - - g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2); - g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2); - g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2); - - g_b_scaler.hbuf = g_hbuf; - g_b_scaler.hbuf_uv = g_hbuf_uv; - - // Set up polyphase filter taps. This needs to be done before - // the scaling because of the floating point math required. The - // coefficients are multiplied by 2^12 so that fixed point math - // can be used in the main scaling loop. -#ifdef FIXED_POINT - fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296; - - product_val = 0; - - for (i = 0; i < g_b_scaler.nw; i++) { - if (product_val > g_b_scaler.nw) - product_val -= g_b_scaler.nw; - - phase_offset_int = (fixed_mult * product_val) >> 16; - - g_b_scaler.c_w[i * 4] = c3_fixed(phase_offset_int); - g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int); - g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int); - g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int); - - product_val += d_w; - } - - - fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296; - - product_val = 0; - - for (i = 0; i < g_b_scaler.nh; i++) { - if (product_val > g_b_scaler.nh) - product_val -= g_b_scaler.nh; - - phase_offset_int = (fixed_mult * product_val) >> 16; - - g_b_scaler.c_h[i * 4] = c0_fixed(phase_offset_int); - g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int); - g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int); - g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int); - - product_val += d_h; - } - - fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296; - - product_val = 0; - - for (i = 0; i < g_b_scaler.nh_uv; i++) { - if (product_val > g_b_scaler.nh_uv) - product_val -= g_b_scaler.nh_uv; - - phase_offset_int = (fixed_mult * product_val) >> 16; - - g_b_scaler.c_h_uv[i * 4] = c0_fixed(phase_offset_int); - g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int); - g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int); - g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int); - - product_val += d_h_uv; - } - -#else - - for (i = 0; i < g_nw; i++) { - phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw; - g_c_w[i * 4] = (C3(phase_offset) * 4096.0); - g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0); - g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0); - g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0); - } - - for (i = 0; i < g_nh; i++) { - phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh; - g_c_h[i * 4] = (C0(phase_offset) * 4096.0); - g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0); - g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0); - g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0); - } - - for (i = 0; i < g_nh_uv; i++) { - phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv; - g_c_h_uv[i * 4] = (C0(phase_offset) * 4096.0); - g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0); - g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0); - g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0); - } - -#endif - - // Create an array that corresponds input lines to output lines. - // This doesn't require floating point math, but it does require - // a division and because hardware division is not present that - // is a call. - for (i = 0; i < out_width; i++) { - g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw; - - if ((g_b_scaler.l_w[i] + 2) <= in_width) - g_b_scaler.max_usable_out_width = i; - - } - - for (i = 0; i < out_height + 1; i++) { - g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh; - g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv; - } - - return 0; -} - -int bicubic_scale(int in_width, int in_height, int in_stride, - int out_width, int out_height, int out_stride, - unsigned char *input_image, unsigned char *output_image) { - short *RESTRICT l_w, * RESTRICT l_h; - short *RESTRICT c_w, * RESTRICT c_h; - unsigned char *RESTRICT ip, * RESTRICT op; - unsigned char *RESTRICT hbuf; - int h, w, lw, lh; - int temp_sum; - int phase_offset_w, phase_offset_h; - - c_w = g_b_scaler.c_w; - c_h = g_b_scaler.c_h; - - op = output_image; - - l_w = g_b_scaler.l_w; - l_h = g_b_scaler.l_h; - - phase_offset_h = 0; - - for (h = 0; h < out_height; h++) { - // select the row to work on - lh = l_h[h]; - ip = input_image + (in_stride * lh); - - // vp8_filter the row vertically into an temporary buffer. - // If the phase offset == 0 then all the multiplication - // is going to result in the output equalling the input. - // So instead point the temporary buffer to the input. - // Also handle the boundry condition of not being able to - // filter that last lines. - if (phase_offset_h && (lh < in_height - 2)) { - hbuf = g_b_scaler.hbuf; - - for (w = 0; w < in_width; w++) { - temp_sum = c_h[phase_offset_h * 4 + 3] * ip[w - in_stride]; - temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w]; - temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride]; - temp_sum += c_h[phase_offset_h * 4] * ip[w + 2 * in_stride]; - - hbuf[w] = temp_sum >> 12; - } - } else - hbuf = ip; - - // increase the phase offset for the next time around. - if (++phase_offset_h >= g_b_scaler.nh) - phase_offset_h = 0; - - // now filter and expand it horizontally into the final - // output buffer - phase_offset_w = 0; - - for (w = 0; w < out_width; w++) { - // get the index to use to expand the image - lw = l_w[w]; - - temp_sum = c_w[phase_offset_w * 4] * hbuf[lw - 1]; - temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw]; - temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1]; - temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2]; - temp_sum = temp_sum >> 12; - - if (++phase_offset_w >= g_b_scaler.nw) - phase_offset_w = 0; - - // boundry conditions - if ((lw + 2) >= in_width) - temp_sum = hbuf[lw]; - - if (lw == 0) - temp_sum = hbuf[0]; - - op[w] = temp_sum; - } - - op += out_stride; - } - - return 0; -} - -void bicubic_scale_frame_reset() { - g_b_scaler.out_width = 0; - g_b_scaler.out_height = 0; -} - -void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, - int new_width, int new_height) { - - dst->y_width = new_width; - dst->y_height = new_height; - dst->uv_width = new_width / 2; - dst->uv_height = new_height / 2; - - dst->y_stride = dst->y_width; - dst->uv_stride = dst->uv_width; - - bicubic_scale(src->y_width, src->y_height, src->y_stride, - new_width, new_height, dst->y_stride, - src->y_buffer, dst->y_buffer); - - bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, - new_width / 2, new_height / 2, dst->uv_stride, - src->u_buffer, dst->u_buffer); - - bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, - new_width / 2, new_height / 2, dst->uv_stride, - src->v_buffer, dst->v_buffer); -} diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c index 60c21fb3a..febe97d94 100644 --- a/vpx_scale/generic/gen_scalers.c +++ b/vpx_scale/generic/gen_scalers.c @@ -17,688 +17,6 @@ /**************************************************************************** * - * ROUTINE : vp8_horizontal_line_4_5_scale_c - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 4 to 5. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void vp8_horizontal_line_4_5_scale_c(const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width) { - unsigned i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 4; i += 4) { - a = src[0]; - b = src[1]; - des [0] = (unsigned char) a; - des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - c = src[2] * 154; - a = src[3]; - des [2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [3] = (unsigned char)((c + 102 * a + 128) >> 8); - b = src[4]; - des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8); - - src += 4; - des += 5; - } - - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - c = src[2] * 154; - a = src[3]; - des [2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [3] = (unsigned char)((c + 102 * a + 128) >> 8); - des [4] = (unsigned char)(a); - -} - -/**************************************************************************** - * - * ROUTINE : vp8_vertical_band_4_5_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The - * height of the band scaled is 4-pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. - * - ****************************************************************************/ -void vp8_vertical_band_4_5_scale_c(unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b, c, d; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) { - a = des [0]; - b = des [dest_pitch]; - - des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - - c = des[dest_pitch * 2] * 154; - d = des[dest_pitch * 3]; - - des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8); - - /* First line in next band */ - a = des [dest_pitch * 5]; - des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8); - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8_last_vertical_band_4_5_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales last vertical band of pixels by scale 4 to 5. The - * height of the band scaled is 4-pixels. - * - * SPECIAL NOTES : The routine does not have available the first line of - * the band below the current band, since this is the - * last band. - * - ****************************************************************************/ -void vp8_last_vertical_band_4_5_scale_c(unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b, c, d; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) { - a = des[0]; - b = des[dest_pitch]; - - des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - - c = des[dest_pitch * 2] * 154; - d = des[dest_pitch * 3]; - - des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8); - - /* No other line for interplation of this line, so .. */ - des[dest_pitch * 4] = (unsigned char) d; - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8_horizontal_line_2_3_scale_c - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 2 to 3. - * - * SPECIAL NOTES : None. - * - * - ****************************************************************************/ -void vp8_horizontal_line_2_3_scale_c(const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 2; i += 2) { - a = src[0]; - b = src[1]; - c = src[2]; - - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); - - src += 2; - des += 3; - } - - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [2] = (unsigned char)(b); -} - - -/**************************************************************************** - * - * ROUTINE : vp8_vertical_band_2_3_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales vertical band of pixels by scale 2 to 3. The - * height of the band scaled is 2-pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. - * - ****************************************************************************/ -void vp8_vertical_band_2_3_scale_c(unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) { - a = des [0]; - b = des [dest_pitch]; - c = des[dest_pitch * 3]; - des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [dest_pitch * 2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8_last_vertical_band_2_3_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales last vertical band of pixels by scale 2 to 3. The - * height of the band scaled is 2-pixels. - * - * SPECIAL NOTES : The routine does not have available the first line of - * the band below the current band, since this is the - * last band. - * - ****************************************************************************/ -void vp8_last_vertical_band_2_3_scale_c(unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) { - a = des [0]; - b = des [dest_pitch]; - - des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [dest_pitch * 2] = (unsigned char)(b); - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8_horizontal_line_3_5_scale_c - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 3 to 5. - * - * SPECIAL NOTES : None. - * - * - ****************************************************************************/ -void vp8_horizontal_line_3_5_scale_c(const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 3; i += 3) { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - - c = src[2]; - des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - a = src[3]; - des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); - - src += 3; - des += 5; - } - - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - - des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - c = src[2]; - des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - des [4] = (unsigned char)(c); -} - -/**************************************************************************** - * - * ROUTINE : vp8_vertical_band_3_5_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The - * height of the band scaled is 3-pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. - * - ****************************************************************************/ -void vp8_vertical_band_3_5_scale_c(unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) { - a = des [0]; - b = des [dest_pitch]; - des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - - c = des[dest_pitch * 2]; - des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - /* First line in next band... */ - a = des [dest_pitch * 5]; - des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8_last_vertical_band_3_5_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales last vertical band of pixels by scale 3 to 5. The - * height of the band scaled is 3-pixels. - * - * SPECIAL NOTES : The routine does not have available the first line of - * the band below the current band, since this is the - * last band. - * - ****************************************************************************/ -void vp8_last_vertical_band_3_5_scale_c(unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) { - a = des [0]; - b = des [dest_pitch]; - - des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - - c = des[dest_pitch * 2]; - des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - /* No other line for interplation of this line, so .. */ - des [ dest_pitch * 4 ] = (unsigned char)(c); - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8_horizontal_line_3_4_scale_c - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 3 to 4. - * - * SPECIAL NOTES : None. - * - * - ****************************************************************************/ -void vp8_horizontal_line_3_4_scale_c(const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 3; i += 3) { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - - c = src[2]; - des [2] = (unsigned char)((b + c + 1) >> 1); - - a = src[3]; - des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); - - src += 3; - des += 4; - } - - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - - c = src[2]; - des [2] = (unsigned char)((b + c + 1) >> 1); - des [3] = (unsigned char)(c); -} - -/**************************************************************************** - * - * ROUTINE : vp8_vertical_band_3_4_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales vertical band of pixels by scale 3 to 4. The - * height of the band scaled is 3-pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. - * - ****************************************************************************/ -void vp8_vertical_band_3_4_scale_c(unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) { - a = des [0]; - b = des [dest_pitch]; - des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - - c = des[dest_pitch * 2]; - des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1); - - /* First line in next band... */ - a = des [dest_pitch * 4]; - des [dest_pitch * 3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8_last_vertical_band_3_4_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales last vertical band of pixels by scale 3 to 4. The - * height of the band scaled is 3-pixels. - * - * SPECIAL NOTES : The routine does not have available the first line of - * the band below the current band, since this is the - * last band. - * - ****************************************************************************/ -void vp8_last_vertical_band_3_4_scale_c(unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) { - a = des [0]; - b = des [dest_pitch]; - - des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - - c = des[dest_pitch * 2]; - des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1); - - /* No other line for interplation of this line, so .. */ - des [dest_pitch * 3] = (unsigned char)(c); - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8_horizontal_line_1_2_scale_c - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 1 to 2. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void vp8_horizontal_line_1_2_scale_c(const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 1; i += 1) { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a + b + 1) >> 1); - src += 1; - des += 2; - } - - a = src[0]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)(a); -} - -/**************************************************************************** - * - * ROUTINE : vp8_vertical_band_1_2_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The - * height of the band scaled is 1-pixel. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. - * - ****************************************************************************/ -void vp8_vertical_band_1_2_scale_c(unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width) { - unsigned int i; - unsigned int a, b; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) { - a = des [0]; - b = des [dest_pitch * 2]; - - des[dest_pitch] = (unsigned char)((a + b + 1) >> 1); - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8_last_vertical_band_1_2_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales last vertical band of pixels by scale 1 to 2. The - * height of the band scaled is 1-pixel. - * - * SPECIAL NOTES : The routine does not have available the first line of - * the band below the current band, since this is the - * last band. - * - ****************************************************************************/ -void vp8_last_vertical_band_1_2_scale_c(unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width) { - unsigned int i; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) { - des[dest_pitch] = des[0]; - des++; - } -} - - - - - -/**************************************************************************** - * - * ROUTINE : vp8_horizontal_line_4_5_scale_c * * INPUTS : const unsigned char *source : Pointer to source data. * unsigned int source_width : Stride of source. diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c index 584ad4b31..49cdb7bc7 100644 --- a/vpx_scale/generic/vpxscale.c +++ b/vpx_scale/generic/vpxscale.c @@ -23,7 +23,6 @@ #include "./vpx_scale_rtcd.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/yv12config.h" -#include "vpx_scale/scale_mode.h" typedef struct { int expanded_frame_width; @@ -39,66 +38,6 @@ typedef struct { } SCALE_VARS; -/**************************************************************************** - * - * ROUTINE : horizontal_line_copy - * - * INPUTS : None - * - * - * OUTPUTS : None. - * - * RETURNS : None - * - * FUNCTION : 1 to 1 scaling up for a horizontal line of pixles - * - * SPECIAL NOTES : None. - * - * ERRORS : None. - * - ****************************************************************************/ -static -void horizontal_line_copy( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) { - (void) dest_width; - - duck_memcpy(dest, source, source_width); -} -/**************************************************************************** - * - * ROUTINE : null_scale - * - * INPUTS : None - * - * - * OUTPUTS : None. - * - * RETURNS : None - * - * FUNCTION : 1 to 1 scaling up for a vertical band - * - * SPECIAL NOTES : None. - * - * ERRORS : None. - * - ****************************************************************************/ -static -void null_scale( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) { - (void) dest; - (void) dest_pitch; - (void) dest_width; - - return; -} - /**************************************************************************** * * ROUTINE : scale1d_2t1_i @@ -589,422 +528,3 @@ void vp8_scale_frame for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); } -/**************************************************************************** - * - * ROUTINE : any_ratio_2d_scale - * - * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance (NOT USED). - * const unsigned char *source : Pointer to source image. - * unsigned int source_pitch : Stride of source image. - * unsigned int source_width : Width of source image. - * unsigned int source_height : Height of source image (NOT USED). - * unsigned char *dest : Pointer to destination image. - * unsigned int dest_pitch : Stride of destination image. - * unsigned int dest_width : Width of destination image. - * unsigned int dest_height : Height of destination image. - * - * OUTPUTS : None. - * - * RETURNS : int: 1 if image scaled, 0 if image could not be scaled. - * - * FUNCTION : Scale the image with changing apect ratio. - * - * SPECIAL NOTES : This scaling is a bi-linear scaling. Need to re-work the - * whole function for new scaling algorithm. - * - ****************************************************************************/ -static -int any_ratio_2d_scale -( - SCALE_VARS *si, - const unsigned char *source, - int source_pitch, - unsigned int source_width, - unsigned int source_height, - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width, - unsigned int dest_height -) { - unsigned int i, k; - unsigned int src_band_height = 0; - unsigned int dest_band_height = 0; - - /* suggested scale factors */ - int hs = si->HScale; - int hr = si->HRatio; - int vs = si->VScale; - int vr = si->VRatio; - - /* assume the ratios are scalable instead of should be centered */ - int ratio_scalable = 1; - - const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch))); - const unsigned char *line_src; - - void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; - void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; - void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; - - (void) si; - - /* find out the ratio for each direction */ - switch (hr * 30 / hs) { - case 24: - /* 4-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_4_5_scale; - break; - case 22: - /* 3-4 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_3_4_scale; - break; - - case 20: - /* 4-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_2_3_scale; - break; - case 18: - /* 3-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_3_5_scale; - break; - case 15: - /* 1-2 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_1_2_scale; - break; - case 30: - /* no scale in Width direction */ - horiz_line_scale = horizontal_line_copy; - break; - default: - /* The ratio is not acceptable now */ - /* throw("The ratio is not acceptable for now!"); */ - ratio_scalable = 0; - break; - } - - switch (vr * 30 / vs) { - case 24: - /* 4-5 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_4_5_scale; - last_vert_band_scale = vp8_last_vertical_band_4_5_scale; - src_band_height = 4; - dest_band_height = 5; - break; - case 22: - /* 3-4 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_3_4_scale; - last_vert_band_scale = vp8_last_vertical_band_3_4_scale; - src_band_height = 3; - dest_band_height = 4; - break; - case 20: - /* 2-3 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_2_3_scale; - last_vert_band_scale = vp8_last_vertical_band_2_3_scale; - src_band_height = 2; - dest_band_height = 3; - break; - case 18: - /* 3-5 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_3_5_scale; - last_vert_band_scale = vp8_last_vertical_band_3_5_scale; - src_band_height = 3; - dest_band_height = 5; - break; - case 15: - /* 1-2 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_1_2_scale; - last_vert_band_scale = vp8_last_vertical_band_1_2_scale; - src_band_height = 1; - dest_band_height = 2; - break; - case 30: - /* no scale in Width direction */ - vert_band_scale = null_scale; - last_vert_band_scale = null_scale; - src_band_height = 4; - dest_band_height = 4; - break; - default: - /* The ratio is not acceptable now */ - /* throw("The ratio is not acceptable for now!"); */ - ratio_scalable = 0; - break; - } - - if (ratio_scalable == 0) - return ratio_scalable; - - horiz_line_scale(source, source_width, dest, dest_width); - - /* except last band */ - for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) { - /* scale one band horizontally */ - for (i = 1; i < src_band_height; i++) { - /* Trap case where we could read off the base of the source buffer */ - line_src = source + i * source_pitch; - - if (line_src < source_base) - line_src = source_base; - - horiz_line_scale(line_src, source_width, - dest + i * dest_pitch, dest_width); - } - - /* first line of next band */ - /* Trap case where we could read off the base of the source buffer */ - line_src = source + src_band_height * source_pitch; - - if (line_src < source_base) - line_src = source_base; - - horiz_line_scale(line_src, source_width, - dest + dest_band_height * dest_pitch, - dest_width); - - /* Vertical scaling is in place */ - vert_band_scale(dest, dest_pitch, dest_width); - - /* Next band... */ - source += src_band_height * source_pitch; - dest += dest_band_height * dest_pitch; - } - - /* scale one band horizontally */ - for (i = 1; i < src_band_height; i++) { - /* Trap case where we could read off the base of the source buffer */ - line_src = source + i * source_pitch; - - if (line_src < source_base) - line_src = source_base; - - horiz_line_scale(line_src, source_width, - dest + i * dest_pitch, - dest_width); - } - - /* Vertical scaling is in place */ - last_vert_band_scale(dest, dest_pitch, dest_width); - - return ratio_scalable; -} - -/**************************************************************************** - * - * ROUTINE : any_ratio_frame_scale - * - * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance (NOT USED). - * unsigned char *frame_buffer : Pointer to source image. - * int YOffset : Offset from start of buffer to Y samples. - * int UVOffset : Offset from start of buffer to UV samples. - * - * OUTPUTS : None. - * - * RETURNS : int: 1 if image scaled, 0 if image could not be scaled. - * - * FUNCTION : Scale the image with changing apect ratio. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) { - int i; - int ew; - int eh; - - /* suggested scale factors */ - int hs = scale_vars->HScale; - int hr = scale_vars->HRatio; - int vs = scale_vars->VScale; - int vr = scale_vars->VRatio; - - int ratio_scalable = 1; - - int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs; - int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs; - int dw = scale_vars->expanded_frame_width; - int dh = scale_vars->expanded_frame_height; - YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config; - YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config; - - if (hr == 3) - ew = (sw + 2) / 3 * 3 * hs / hr; - else - ew = (sw + 7) / 8 * 8 * hs / hr; - - if (vr == 3) - eh = (sh + 2) / 3 * 3 * vs / vr; - else - eh = (sh + 7) / 8 * 8 * vs / vr; - - ratio_scalable = any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->y_buffer, - src_yuv_config->y_stride, sw, sh, - (unsigned char *) dst_yuv_config->y_buffer + YOffset, - dst_yuv_config->y_stride, dw, dh); - - for (i = 0; i < eh; i++) - duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw); - - for (i = dh; i < eh; i++) - duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew); - - if (ratio_scalable == 0) - return ratio_scalable; - - sw = (sw + 1) >> 1; - sh = (sh + 1) >> 1; - dw = (dw + 1) >> 1; - dh = (dh + 1) >> 1; - - any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->u_buffer, - src_yuv_config->y_stride / 2, sw, sh, - (unsigned char *)dst_yuv_config->u_buffer + UVOffset, - dst_yuv_config->uv_stride, dw, dh); - - any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->v_buffer, - src_yuv_config->y_stride / 2, sw, sh, - (unsigned char *)dst_yuv_config->v_buffer + UVOffset, - dst_yuv_config->uv_stride, dw, dh); - - return ratio_scalable; -} - -/**************************************************************************** - * - * ROUTINE : center_image - * - * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Centers the image without scaling in the output buffer. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static void -center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) { - int i; - int row_offset, col_offset; - unsigned char *src_data_pointer; - unsigned char *dst_data_pointer; - - /* center values */ - row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2; - col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2; - - /* Y's */ - src_data_pointer = src_yuv_config->y_buffer; - dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset; - - for (i = 0; i < src_yuv_config->y_height; i++) { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width); - dst_data_pointer += dst_yuv_config->y_stride; - src_data_pointer += src_yuv_config->y_stride; - } - - row_offset /= 2; - col_offset /= 2; - - /* U's */ - src_data_pointer = src_yuv_config->u_buffer; - dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; - - for (i = 0; i < src_yuv_config->uv_height; i++) { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); - dst_data_pointer += dst_yuv_config->uv_stride; - src_data_pointer += src_yuv_config->uv_stride; - } - - /* V's */ - src_data_pointer = src_yuv_config->v_buffer; - dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; - - for (i = 0; i < src_yuv_config->uv_height; i++) { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); - dst_data_pointer += dst_yuv_config->uv_stride; - src_data_pointer += src_yuv_config->uv_stride; - } -} - -/**************************************************************************** - * - * ROUTINE : scale_or_center - * - * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance. - * - * - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Decides to scale or center image in scale buffer for blit - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void -vp8_yv12_scale_or_center -( - YV12_BUFFER_CONFIG *src_yuv_config, - YV12_BUFFER_CONFIG *dst_yuv_config, - int expanded_frame_width, - int expanded_frame_height, - int scaling_mode, - int HScale, - int HRatio, - int VScale, - int VRatio -) { - /*if ( ppi->post_processing_level ) - update_umvborder ( ppi, frame_buffer );*/ - - - switch (scaling_mode) { - case SCALE_TO_FIT: - case MAINTAIN_ASPECT_RATIO: { - SCALE_VARS scale_vars; - /* center values */ -#if 1 - int row = (dst_yuv_config->y_height - expanded_frame_height) / 2; - int col = (dst_yuv_config->y_width - expanded_frame_width) / 2; - /*int YOffset = row * dst_yuv_config->y_width + col; - int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/ - int YOffset = row * dst_yuv_config->y_stride + col; - int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1); -#else - int row = (src_yuv_config->y_height - expanded_frame_height) / 2; - int col = (src_yuv_config->y_width - expanded_frame_width) / 2; - int YOffset = row * src_yuv_config->y_width + col; - int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1); -#endif - - scale_vars.dst_yuv_config = dst_yuv_config; - scale_vars.src_yuv_config = src_yuv_config; - scale_vars.HScale = HScale; - scale_vars.HRatio = HRatio; - scale_vars.VScale = VScale; - scale_vars.VRatio = VRatio; - scale_vars.expanded_frame_width = expanded_frame_width; - scale_vars.expanded_frame_height = expanded_frame_height; - - /* perform center and scale */ - any_ratio_frame_scale(&scale_vars, YOffset, UVOffset); - - break; - } - case CENTER: - center_image(src_yuv_config, dst_yuv_config); - break; - - default: - break; - } -} diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h deleted file mode 100644 index c53525271..000000000 --- a/vpx_scale/include/generic/vpxscale_arbitrary.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __VPX_SCALE_ARBITRARY_H__ -#define __VPX_SCALE_ARBITRARY_H__ - -#include "vpx_scale/yv12config.h" - -typedef struct { - int in_width; - int in_height; - - int out_width; - int out_height; - int max_usable_out_width; - - // numerator for the width and height - int nw; - int nh; - int nh_uv; - - // output to input correspondance array - short *l_w; - short *l_h; - short *l_h_uv; - - // polyphase coefficients - short *c_w; - short *c_h; - short *c_h_uv; - - // buffer for horizontal filtering. - unsigned char *hbuf; - unsigned char *hbuf_uv; -} BICUBIC_SCALER_STRUCT; - -int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height); -int bicubic_scale(int in_width, int in_height, int in_stride, - int out_width, int out_height, int out_stride, - unsigned char *input_image, unsigned char *output_image); -void bicubic_scale_frame_reset(); -void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, - int new_width, int new_height); -void bicubic_coefficient_init(); -void bicubic_coefficient_destroy(); - -#endif /* __VPX_SCALE_ARBITRARY_H__ */ diff --git a/vpx_scale/include/generic/vpxscale_depricated.h b/vpx_scale/include/generic/vpxscale_depricated.h deleted file mode 100644 index 3f7fe0f04..000000000 --- a/vpx_scale/include/generic/vpxscale_depricated.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : postp.h -* -* Description : Post processor interface -* -****************************************************************************/ -#ifndef VPXSCALE_H -#define VPXSCALE_H - -extern void (*vp8_vertical_band_4_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -extern void (*vp8_last_vertical_band_4_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -extern void (*vp8_vertical_band_3_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -extern void (*vp8_last_vertical_band_3_5_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -extern void (*vp8_horizontal_line_1_2_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -extern void (*vp8_horizontal_line_3_5_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -extern void (*vp8_horizontal_line_4_5_scale)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -extern void (*vp8_vertical_band_1_2_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -extern void (*vp8_last_vertical_band_1_2_scale)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); - -extern void dmachine_specific_config(int mmx_enabled, int xmm_enabled, int wmt_enabled); - -#endif diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h deleted file mode 100644 index 5581385b6..000000000 --- a/vpx_scale/scale_mode.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -***************************************************************************** -*/ - -#ifndef SCALE_MODE_H -#define SCALE_MODE_H - -typedef enum { - MAINTAIN_ASPECT_RATIO = 0x0, - SCALE_TO_FIT = 0x1, - CENTER = 0x2, - OTHER = 0x3 -} SCALE_MODE; - - -#endif diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk index 5e04a8090..28fcfaee0 100644 --- a/vpx_scale/vpx_scale.mk +++ b/vpx_scale/vpx_scale.mk @@ -1,5 +1,4 @@ SCALE_SRCS-yes += vpx_scale.mk -SCALE_SRCS-yes += scale_mode.h SCALE_SRCS-yes += yv12config.h SCALE_SRCS-yes += vpxscale.h SCALE_SRCS-yes += generic/vpxscale.c diff --git a/vpx_scale/vpx_scale_rtcd.sh b/vpx_scale/vpx_scale_rtcd.sh index f709aa61c..e2bade077 100644 --- a/vpx_scale/vpx_scale_rtcd.sh +++ b/vpx_scale/vpx_scale_rtcd.sh @@ -7,21 +7,6 @@ forward_decls vpx_scale_forward_decls # Scaler functions if [ "CONFIG_SPATIAL_RESAMPLING" != "yes" ]; then - prototype void vp8_horizontal_line_4_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_last_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_2_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_last_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_3_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_last_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_3_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_last_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_1_2_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_last_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h index 3c2194d65..308b6aab6 100644 --- a/vpx_scale/vpxscale.h +++ b/vpx_scale/vpxscale.h @@ -14,16 +14,6 @@ #include "vpx_scale/yv12config.h" -extern void vp8_yv12_scale_or_center(YV12_BUFFER_CONFIG *src_yuv_config, - YV12_BUFFER_CONFIG *dst_yuv_config, - int expanded_frame_width, - int expanded_frame_height, - int scaling_mode, - int HScale, - int HRatio, - int VScale, - int VRatio); - extern void vp8_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, unsigned char *temp_area, diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c index 2d96cc7c1..54f9ac069 100644 --- a/vpx_scale/win32/scaleopt.c +++ b/vpx_scale/win32/scaleopt.c @@ -18,1184 +18,14 @@ ****************************************************************************/ #include "pragmas.h" - - /**************************************************************************** * Module Statics ****************************************************************************/ -__declspec(align(16)) const static unsigned short one_fifth[] = { 51, 51, 51, 51 }; -__declspec(align(16)) const static unsigned short two_fifths[] = { 102, 102, 102, 102 }; -__declspec(align(16)) const static unsigned short three_fifths[] = { 154, 154, 154, 154 }; -__declspec(align(16)) const static unsigned short four_fifths[] = { 205, 205, 205, 205 }; __declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 128, 128 }; -__declspec(align(16)) const static unsigned short four_ones[] = { 1, 1, 1, 1}; -__declspec(align(16)) const static unsigned short const45_2[] = {205, 154, 102, 51 }; -__declspec(align(16)) const static unsigned short const45_1[] = { 51, 102, 154, 205 }; -__declspec(align(16)) const static unsigned char mask45[] = { 0, 0, 0, 0, 0, 0, 255, 0}; -__declspec(align(16)) const static unsigned short const35_2[] = { 154, 51, 205, 102 }; -__declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, 154 }; - - #include "vpx_scale/vpxscale.h" #include "vpx_mem/vpx_mem.h" -/**************************************************************************** - * - * ROUTINE : horizontal_line_3_5_scale_mmx - * - * INPUTS : const unsigned char *source : - * unsigned int source_width : - * unsigned char *dest : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 3 to 5 up-scaling of a horizontal line of pixels. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void horizontal_line_3_5_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) { - (void) dest_width; - - __asm { - - push ebx - - mov esi, source - mov edi, dest - - mov ecx, source_width - lea edx, [esi+ecx-3]; - - movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx - movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx - - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 - - horiz_line_3_5_loop: - - mov eax, DWORD PTR [esi] // eax = 00 01 02 03 - mov ebx, eax - - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 - - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx - - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 - - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx - - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx - - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // - - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // - - mov [edi], ebx // writeoutput 00 xx xx xx - add esi, 3 - - add edi, 5 - paddw mm0, mm1 - - paddw mm0, mm4 - psrlw mm0, 8 - - cmp esi, edx - packuswb mm0, mm7 - - movd DWORD Ptr [edi-4], mm0 - jl horiz_line_3_5_loop - -// Exit: - mov eax, DWORD PTR [esi] // eax = 00 01 02 03 - mov ebx, eax - - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 - - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx - - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 - - shl eax, 8 // eax = xx 01 02 02 - and eax, 0xffff0000 // eax = xx xx 02 02 - - or eax, ebx // eax = 01 02 02 02 - - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx - - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx - - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // - - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // - - mov [edi], ebx // writeoutput 00 xx xx xx - paddw mm0, mm1 - - paddw mm0, mm4 - psrlw mm0, 8 - - packuswb mm0, mm7 - movd DWORD Ptr [edi+1], mm0 - - pop ebx - - } - -} - - -/**************************************************************************** - * - * ROUTINE : horizontal_line_4_5_scale_mmx - * - * INPUTS : const unsigned char *source : - * unsigned int source_width : - * unsigned char *dest : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 4 to 5 up-scaling of a horizontal line of pixels. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void horizontal_line_4_5_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) { - (void)dest_width; - - __asm { - - mov esi, source - mov edi, dest - - mov ecx, source_width - lea edx, [esi+ecx-8]; - - movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx - movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx - - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 - - horiz_line_4_5_loop: - - movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 - - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 - - movd DWORD PTR [edi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - - movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 - - paddw mm0, mm1 // added round values - paddw mm0, mm4 - - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 - - movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 - add edi, 10 - - add esi, 8 - paddw mm2, mm3 // - - paddw mm2, mm4 // added round values - cmp esi, edx - - psrlw mm2, 8 - packuswb mm2, mm7 - - movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 - jl horiz_line_4_5_loop - -// Exit: - movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 - - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 - - movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 - pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 - - psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 - por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 - - movq mm3, mm1 - - movd DWORD PTR [edi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - - movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 - - paddw mm0, mm1 // added round values - paddw mm0, mm4 - - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx - - movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 - paddw mm2, mm3 // - - paddw mm2, mm4 // added round values - psrlw mm2, 8 - - packuswb mm2, mm7 - movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 - - - } -} - -/**************************************************************************** - * - * ROUTINE : vertical_band_4_5_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 4 to 5 up-scaling of a 4 pixel high band of pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has a "C" only - * version. - * - ****************************************************************************/ -static -void vertical_band_4_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) { - __asm { - - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - vs_4_5_loop: - - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 1/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 - - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm5, two_fifths - movq mm2, mm0 // make a copy - - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths - - - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 - - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 - - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 - - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 - - paddw mm3, round_values // + 128 - psrlw mm1, 8 - - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - movq mm1, [edi] // mm1=Src[3]; - - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking - - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 - - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 - - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy - - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 - - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 - - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 - - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - - psrlw mm0, 8 - psrlw mm2, 8 - - packuswb mm0, mm2 // des[3] - movq QWORD ptr [edi], mm0 // write des[3] - - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking - - movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group - - movq mm5, four_fifths // mm5 = 4/5 - pmullw mm1, mm5 // d * 4/5 - - movq mm6, one_fifth // mm6 = 1/5 - movq mm2, mm0 // make a copy - - pmullw mm3, mm5 // d * 4/5 - punpcklbw mm0, mm7 // unpack low - - pmullw mm0, mm6 // an * 1/5 - punpckhbw mm2, mm7 // unpack high - - paddw mm1, mm0 // d * 4/5 + an * 1/5 - pmullw mm2, mm6 // an * 1/5 - - paddw mm3, mm2 // d * 4/5 + an * 1/5 - paddw mm1, round_values // + 128 - - paddw mm3, round_values // + 128 - psrlw mm1, 8 - - psrlw mm3, 8 - packuswb mm1, mm3 // des[4] - - movq QWORD ptr [edi+ecx], mm1 // write des[4] - - add edi, 8 - add esi, 8 - - sub edx, 8 - jg vs_4_5_loop - } -} - -/**************************************************************************** - * - * ROUTINE : last_vertical_band_4_5_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : None - * - * FUNCTION : 4 to 5 up-scaling of the last 4-pixel high band in an image. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has an "C" only - * version. - * - ****************************************************************************/ -static -void last_vertical_band_4_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) { - __asm { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - last_vs_4_5_loop: - - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 1/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 - - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm5, two_fifths - movq mm2, mm0 // make a copy - - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths - - - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 - - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 - - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 - - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 - - paddw mm3, round_values // + 128 - psrlw mm1, 8 - - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - movq mm1, [edi] // mm1=Src[3]; - - movq QWORD ptr [edi+ecx], mm1 // write des[4]; - - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking - - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 - - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 - - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy - - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 - - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 - - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 - - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - - psrlw mm0, 8 - psrlw mm2, 8 - - packuswb mm0, mm2 // des[3] - movq QWORD ptr [edi], mm0 // write des[3] - - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking - add edi, 8 - add esi, 8 - - sub edx, 8 - jg last_vs_4_5_loop - } -} - -/**************************************************************************** - * - * ROUTINE : vertical_band_3_5_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has an "C" only - * version. - * - ****************************************************************************/ -static -void vertical_band_3_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) { - __asm { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - vs_3_5_loop: - - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 2/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 - - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low - - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high - - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 - - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 - - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high - - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low - - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low - - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, four_fifths // c * 4/5 high - - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high - - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - - psrlw mm1, 8 - psrlw mm3, 8 - - packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 - - psrlw mm4, 8 - psrlw mm5, 8 - - packuswb mm4, mm5 // des[3] - movq QWORD ptr [edi], mm4 // write des[3] - - // mm0, mm2 --- Src[3] - - pxor mm7, mm7 // clear mm7 for unpacking - movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group - - movq mm5, three_fifths // mm5 = 3/5 - pmullw mm0, mm5 // d * 3/5 - - movq mm6, two_fifths // mm6 = 2/5 - movq mm3, mm1 // make a copy - - pmullw mm2, mm5 // d * 3/5 - punpcklbw mm1, mm7 // unpack low - - pmullw mm1, mm6 // an * 2/5 - punpckhbw mm3, mm7 // unpack high - - paddw mm0, mm1 // d * 3/5 + an * 2/5 - pmullw mm3, mm6 // an * 2/5 - - paddw mm2, mm3 // d * 3/5 + an * 2/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des[4] - - movq QWORD ptr [edi+ecx], mm0 // write des[4] - - add edi, 8 - add esi, 8 - - sub edx, 8 - jg vs_3_5_loop - } -} - -/**************************************************************************** - * - * ROUTINE : last_vertical_band_3_5_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has an "C" only - * version. - * - ****************************************************************************/ -static -void last_vertical_band_3_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) { - __asm { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - - last_vs_3_5_loop: - - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 2/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 - - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] - - - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low - - movq QWORD ptr [edi+ecx], mm0 // write des[4] - - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high - - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 - - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 - - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high - - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low - - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low - - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, four_fifths // c * 4/5 high - - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high - - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - - psrlw mm1, 8 - psrlw mm3, 8 - - packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 - - psrlw mm4, 8 - psrlw mm5, 8 - - packuswb mm4, mm5 // des[3] - movq QWORD ptr [edi], mm4 // write des[3] - - // mm0, mm2 --- Src[3] - - add edi, 8 - add esi, 8 - - sub edx, 8 - jg last_vs_3_5_loop - } -} - -/**************************************************************************** - * - * ROUTINE : vertical_band_1_2_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 1 to 2 up-scaling of a band of pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has an "C" only - * version. - * - ****************************************************************************/ -static -void vertical_band_1_2_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) { - __asm { - - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - vs_1_2_loop: - - movq mm0, [esi] // get Src[0] - movq mm1, [esi + ecx * 2] // get Src[1] - - movq mm2, mm0 // make copy before unpack - movq mm3, mm1 // make copy before unpack - - punpcklbw mm0, mm7 // low Src[0] - movq mm6, four_ones // mm6= 1, 1, 1, 1 - - punpcklbw mm1, mm7 // low Src[1] - paddw mm0, mm1 // low (a + b) - - punpckhbw mm2, mm7 // high Src[0] - paddw mm0, mm6 // low (a + b + 1) - - punpckhbw mm3, mm7 - paddw mm2, mm3 // high (a + b ) - - psraw mm0, 1 // low (a + b +1 )/2 - paddw mm2, mm6 // high (a + b + 1) - - psraw mm2, 1 // high (a + b + 1)/2 - packuswb mm0, mm2 // pack results - - movq [esi+ecx], mm0 // write out eight bytes - add esi, 8 - - sub edx, 8 - jg vs_1_2_loop - } - -} - -/**************************************************************************** - * - * ROUTINE : last_vertical_band_1_2_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 1 to 2 up-scaling of band of pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has an "C" only - * version. - * - ****************************************************************************/ -static -void last_vertical_band_1_2_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) { - __asm { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - mov edx, dest_width // Loop counter - - last_vs_1_2_loop: - - movq mm0, [esi] // get Src[0] - movq [esi+ecx], mm0 // write out eight bytes - - add esi, 8 - sub edx, 8 - - jg last_vs_1_2_loop - } -} - -/**************************************************************************** - * - * ROUTINE : horizontal_line_1_2_scale - * - * INPUTS : const unsigned char *source : - * unsigned int source_width : - * unsigned char *dest : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void horizontal_line_1_2_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) { - (void) dest_width; - - __asm { - mov esi, source - mov edi, dest - - pxor mm7, mm7 - movq mm6, four_ones - - mov ecx, source_width - - hs_1_2_loop: - - movq mm0, [esi] - movq mm1, [esi+1] - - movq mm2, mm0 - movq mm3, mm1 - - movq mm4, mm0 - punpcklbw mm0, mm7 - - punpcklbw mm1, mm7 - paddw mm0, mm1 - - paddw mm0, mm6 - punpckhbw mm2, mm7 - - punpckhbw mm3, mm7 - paddw mm2, mm3 - - paddw mm2, mm6 - psraw mm0, 1 - - psraw mm2, 1 - packuswb mm0, mm2 - - movq mm2, mm4 - punpcklbw mm2, mm0 - - movq [edi], mm2 - punpckhbw mm4, mm0 - - movq [edi+8], mm4 - add esi, 8 - - add edi, 16 - sub ecx, 8 - - cmp ecx, 8 - jg hs_1_2_loop - -// last eight pixel - - movq mm0, [esi] - movq mm1, mm0 - - movq mm2, mm0 - movq mm3, mm1 - - psrlq mm1, 8 - psrlq mm3, 56 - - psllq mm3, 56 - por mm1, mm3 - - movq mm3, mm1 - movq mm4, mm0 - - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - - paddw mm0, mm1 - paddw mm0, mm6 - - punpckhbw mm2, mm7 - punpckhbw mm3, mm7 - - paddw mm2, mm3 - paddw mm2, mm6 - - psraw mm0, 1 - psraw mm2, 1 - - packuswb mm0, mm2 - movq mm2, mm4 - - punpcklbw mm2, mm0 - movq [edi], mm2 - - punpckhbw mm4, mm0 - movq [edi+8], mm4 - } -} - - - - - __declspec(align(16)) const static unsigned short const54_2[] = { 0, 64, 128, 192 }; __declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, 64 }; @@ -1685,25 +515,6 @@ void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch void register_mmxscalers(void) { - vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; - vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; - vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; - vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; - vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; - vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; - vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; - vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; - vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; - - vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; - vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; - vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - - - vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; @@ -1711,8 +522,4 @@ register_mmxscalers(void) { vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; - - - - } diff --git a/vpx_scale/win32/scalesystemdependent.c b/vpx_scale/win32/scalesystemdependent.c deleted file mode 100644 index 98913d19a..000000000 --- a/vpx_scale/win32/scalesystemdependent.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : system_dependent.c -* -* Description : Miscellaneous system dependent functions -* -****************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ -#include "vpx_scale/vpxscale.h" -#include "cpuidlib.h" - -/**************************************************************************** -* Imports -*****************************************************************************/ -extern void register_generic_scalers(void); -extern void register_mmxscalers(void); - -/**************************************************************************** - * - * ROUTINE : post_proc_machine_specific_config - * - * INPUTS : UINT32 Version : Codec version number. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Checks for machine specifc features such as MMX support - * sets appropriate flags and function pointers. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void -vp8_scale_machine_specific_config(void) { - // If MMX supported then set to use MMX versions of functions else - // use original 'C' versions. - int mmx_enabled; - int xmm_enabled; - int wmt_enabled; - - vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); - - if (mmx_enabled || xmm_enabled || wmt_enabled) { - register_mmxscalers(); - } else { - vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; - vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; - vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; - vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; - vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; - vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; - vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; - vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; - vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c; - vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c; - vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; - - - vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; - vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; - vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; - vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; - vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; - vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; - vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; - - } -} From 806d05e1a81b3a04bdab7af08fb032f57b815886 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Mon, 3 Dec 2012 14:53:45 -0800 Subject: [PATCH 04/77] merged optimiz_b_16x16() into optmize_b() The commit changed the trellis quantization function optimize_b() to work for MBs using all transform sizes, and eliminated the function for MB using 16x16 transform only, optimize_b_16x16. Change-Id: I3fa650587ab5198ed16315b38754783a72b33ba2 --- vp9/encoder/vp9_encodemb.c | 238 ++++--------------------------------- 1 file changed, 24 insertions(+), 214 deletions(-) diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 3596d9ffa..091f2f0fe 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -302,36 +302,23 @@ static const int plane_rd_mult[4] = { static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int tx_size) { - BLOCK *b; - BLOCKD *d; - vp9_token_state tokens[65][2]; - uint64_t best_mask[2]; - const short *dequant_ptr; - const short *coeff_ptr; - short *qcoeff_ptr; - short *dqcoeff_ptr; - int eob; - int i0; - int rc; - int x; - int sz = 0; - int next; - int rdmult; - int rddiv; - int final_eob; - int64_t rd_cost0, rd_cost1; - int rate0, rate1; - int error0, error1; - int t0, t1; - int best; - int band; - int pt; + BLOCK *b = &mb->block[i]; + BLOCKD *d = &mb->e_mbd.block[i]; + vp9_token_state tokens[257][2]; + unsigned best_index[257][2]; + const short *dequant_ptr = d->dequant, *coeff_ptr = b->coeff; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + int eob = d->eob, final_eob, sz = 0; + int i0 = (type == PLANE_TYPE_Y_NO_DC); + int rc, x, next; + int64_t rdmult, rddiv, rd_cost0, rd_cost1; + int rate0, rate1, error0, error1, t0, t1; + int best, band, pt; int err_mult = plane_rd_mult[type]; int default_eob; int const *scan, *bands; - b = &mb->block[i]; - d = &mb->e_mbd.block[i]; switch (tx_size) { default: case TX_4X4: @@ -366,21 +353,19 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, bands = vp9_coef_bands_8x8; default_eob = 64; break; + case TX_16X16: + scan = vp9_default_zig_zag1d_16x16; + bands = vp9_coef_bands_16x16; + default_eob = 256; + break; } - dequant_ptr = d->dequant; - coeff_ptr = b->coeff; - qcoeff_ptr = d->qcoeff; - dqcoeff_ptr = d->dqcoeff; - i0 = (type == PLANE_TYPE_Y_NO_DC); - eob = d->eob; - /* Now set up a Viterbi trellis to evaluate alternative roundings. */ rdmult = mb->rdmult * err_mult; if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) rdmult = (rdmult * 9) >> 4; rddiv = mb->rddiv; - best_mask[0] = best_mask[1] = 0; + memset(best_index, 0, sizeof(best_index)); /* Initialize the sentinel node of the trellis. */ tokens[eob][0].rate = 0; tokens[eob][0].error = 0; @@ -390,9 +375,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, *(tokens[eob] + 1) = *(tokens[eob] + 0); next = eob; for (i = eob; i-- > i0;) { - int base_bits; - int d2; - int dx; + int base_bits, d2, dx; rc = scan[i]; x = qcoeff_ptr[rc]; @@ -425,7 +408,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, tokens[i][0].next = next; tokens[i][0].token = t0; tokens[i][0].qc = x; - best_mask[0] |= best << i; + best_index[i][0] = best; /* Evaluate the second possibility for this state. */ rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; @@ -481,7 +464,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, tokens[i][1].next = next; tokens[i][1].token = best ? t1 : t0; tokens[i][1].qc = x; - best_mask[1] |= best << i; + best_index[i][1] = best; /* Finally, make this the new head of the trellis. */ next = i; } @@ -528,7 +511,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]); next = tokens[i][best].next; - best = (best_mask[best] >> i) & 1; + best = best_index[i][best]; } final_eob++; @@ -726,179 +709,6 @@ static void optimize_mb_8x8(MACROBLOCK *x) { vp9_optimize_mbuv_8x8(x); } -static void optimize_b_16x16(MACROBLOCK *mb, int i, PLANE_TYPE type, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { - BLOCK *b = &mb->block[i]; - BLOCKD *d = &mb->e_mbd.block[i]; - vp9_token_state tokens[257][2]; - unsigned best_index[257][2]; - const short *dequant_ptr = d->dequant, *coeff_ptr = b->coeff; - short *qcoeff_ptr = qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = dqcoeff_ptr = d->dqcoeff; - int eob = d->eob, final_eob, sz = 0; - int rc, x, next; - int64_t rdmult, rddiv, rd_cost0, rd_cost1; - int rate0, rate1, error0, error1, t0, t1; - int best, band, pt; - int err_mult = plane_rd_mult[type]; - - /* Now set up a Viterbi trellis to evaluate alternative roundings. */ - rdmult = mb->rdmult * err_mult; - if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) - rdmult = (rdmult * 9)>>4; - rddiv = mb->rddiv; - memset(best_index, 0, sizeof(best_index)); - /* Initialize the sentinel node of the trellis. */ - tokens[eob][0].rate = 0; - tokens[eob][0].error = 0; - tokens[eob][0].next = 256; - tokens[eob][0].token = DCT_EOB_TOKEN; - tokens[eob][0].qc = 0; - *(tokens[eob] + 1) = *(tokens[eob] + 0); - next = eob; - for (i = eob; i-- > 0;) { - int base_bits, d2, dx; - - rc = vp9_default_zig_zag1d_16x16[i]; - x = qcoeff_ptr[rc]; - /* Only add a trellis state for non-zero coefficients. */ - if (x) { - int shortcut = 0; - error0 = tokens[next][0].error; - error1 = tokens[next][1].error; - /* Evaluate the first possibility for this state. */ - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - t0 = (vp9_dct_value_tokens_ptr + x)->Token; - /* Consider both possible successor states. */ - if (next < 256) { - band = vp9_coef_bands_16x16[i + 1]; - pt = vp9_prev_token_class[t0]; - rate0 += mb->token_costs[TX_16X16][type][band][pt][tokens[next][0].token]; - rate1 += mb->token_costs[TX_16X16][type][band][pt][tokens[next][1].token]; - } - UPDATE_RD_COST(); - /* And pick the best. */ - best = rd_cost1 < rd_cost0; - base_bits = *(vp9_dct_value_cost_ptr + x); - dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; - d2 = dx*dx; - tokens[i][0].rate = base_bits + (best ? rate1 : rate0); - tokens[i][0].error = d2 + (best ? error1 : error0); - tokens[i][0].next = next; - tokens[i][0].token = t0; - tokens[i][0].qc = x; - best_index[i][0] = best; - /* Evaluate the second possibility for this state. */ - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - - if((abs(x)*dequant_ptr[rc!=0]>abs(coeff_ptr[rc])) && - (abs(x)*dequant_ptr[rc!=0]Token; - if (next < 256) { - band = vp9_coef_bands_16x16[i + 1]; - if (t0 != DCT_EOB_TOKEN) { - pt = vp9_prev_token_class[t0]; - rate0 += mb->token_costs[TX_16X16][type][band][pt] - [tokens[next][0].token]; - } - if (t1!=DCT_EOB_TOKEN) { - pt = vp9_prev_token_class[t1]; - rate1 += mb->token_costs[TX_16X16][type][band][pt] - [tokens[next][1].token]; - } - } - UPDATE_RD_COST(); - /* And pick the best. */ - best = rd_cost1 < rd_cost0; - base_bits = *(vp9_dct_value_cost_ptr + x); - - if(shortcut) { - dx -= (dequant_ptr[rc!=0] + sz) ^ sz; - d2 = dx*dx; - } - tokens[i][1].rate = base_bits + (best ? rate1 : rate0); - tokens[i][1].error = d2 + (best ? error1 : error0); - tokens[i][1].next = next; - tokens[i][1].token = best ? t1 : t0; - tokens[i][1].qc = x; - best_index[i][1] = best; - /* Finally, make this the new head of the trellis. */ - next = i; - } - /* There's no choice to make for a zero coefficient, so we don't - * add a new trellis node, but we do need to update the costs. - */ - else { - band = vp9_coef_bands_16x16[i + 1]; - t0 = tokens[next][0].token; - t1 = tokens[next][1].token; - /* Update the cost of each path if we're past the EOB token. */ - if (t0 != DCT_EOB_TOKEN) { - tokens[next][0].rate += mb->token_costs[TX_16X16][type][band][0][t0]; - tokens[next][0].token = ZERO_TOKEN; - } - if (t1 != DCT_EOB_TOKEN) { - tokens[next][1].rate += mb->token_costs[TX_16X16][type][band][0][t1]; - tokens[next][1].token = ZERO_TOKEN; - } - /* Don't update next, because we didn't add a new node. */ - } - } - - /* Now pick the best path through the whole trellis. */ - band = vp9_coef_bands_16x16[i + 1]; - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); - rate0 = tokens[next][0].rate; - rate1 = tokens[next][1].rate; - error0 = tokens[next][0].error; - error1 = tokens[next][1].error; - t0 = tokens[next][0].token; - t1 = tokens[next][1].token; - rate0 += mb->token_costs[TX_16X16][type][band][pt][t0]; - rate1 += mb->token_costs[TX_16X16][type][band][pt][t1]; - UPDATE_RD_COST(); - best = rd_cost1 < rd_cost0; - final_eob = -1; - - for (i = next; i < eob; i = next) { - x = tokens[i][best].qc; - if (x) - final_eob = i; - rc = vp9_default_zig_zag1d_16x16[i]; - qcoeff_ptr[rc] = x; - dqcoeff_ptr[rc] = (x * dequant_ptr[rc!=0]); - - next = tokens[i][best].next; - best = best_index[i][best]; - } - final_eob++; - - d->eob = final_eob; - *a = *l = (d->eob > !type); -} - void vp9_optimize_mby_16x16(MACROBLOCK *x) { ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta, *tl; @@ -911,7 +721,7 @@ void vp9_optimize_mby_16x16(MACROBLOCK *x) { ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; - optimize_b_16x16(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl); + optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); } static void optimize_mb_16x16(MACROBLOCK *x) { From 4cc657ec6e17ab975ebba43034d6de5aa8e413ee Mon Sep 17 00:00:00 2001 From: Paul Wilkins Date: Tue, 4 Dec 2012 17:21:05 +0000 Subject: [PATCH 05/77] Change to MV reference search. This patch reduces the cpu cost of the MV ref search by only allowing insert for candidates that would be in the current top 4. This could alter the outcome and slightly favors near candidates which are tested first but also limits the worst case loop count to 4 and means in many cases it will drop out and not happen. Change-Id: Idd795a825f9fd681f30f4fcd550c34c38939e113 --- vp9/common/vp9_blockd.h | 3 ++- vp9/common/vp9_findnearmv.c | 11 ++++---- vp9/common/vp9_mvref_common.c | 49 ++++++++++++++++++----------------- vp9/encoder/vp9_block.h | 2 +- vp9/encoder/vp9_encodeframe.c | 3 +-- vp9/encoder/vp9_onyx_if.c | 2 +- vp9/encoder/vp9_onyx_int.h | 2 +- 7 files changed, 36 insertions(+), 36 deletions(-) diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 17e847462..27ef22fff 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -45,6 +45,7 @@ void vpx_log(const char *format, ...); #define SEGMENT_DELTADATA 0 #define SEGMENT_ABSDATA 1 #define MAX_MV_REFS 9 +#define MAX_MV_REF_CANDIDATES 4 typedef struct { int r, c; @@ -238,7 +239,7 @@ typedef struct { MV_REFERENCE_FRAME ref_frame, second_ref_frame; TX_SIZE txfm_size; int_mv mv[2]; // for each reference frame used - int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS]; + int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; int_mv best_mv, best_second_mv; #if CONFIG_NEW_MVREF int best_index, best_second_index; diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index 02c28fbde..4fd468973 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -139,8 +139,8 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, #if CONFIG_SUBPELREFMV unsigned int sse; #endif - unsigned int ref_scores[MAX_MV_REFS] = {0}; - int_mv sorted_mvs[MAX_MV_REFS]; + unsigned int ref_scores[MAX_MV_REF_CANDIDATES] = {0}; + int_mv sorted_mvs[MAX_MV_REF_CANDIDATES]; int zero_seen = FALSE; // Default all to 0,0 if nothing else available @@ -159,9 +159,8 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, left_ref = ref_y_buffer - 3; #endif - //for(i = 0; i < MAX_MV_REFS; ++i) { - // Limit search to the predicted best 4 - for(i = 0; i < 4; ++i) { + // Limit search to the predicted best few candidates + for(i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { int_mv this_mv; int offset = 0; int row_offset, col_offset; @@ -268,7 +267,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, } // Make sure all the candidates are properly clamped etc - for (i = 0; i < 4; ++i) { + for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { lower_mv_precision(&sorted_mvs[i], xd->allow_high_precision_mv); clamp_mv2(&sorted_mvs[i], xd); } diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index bf60630e3..bbed0554f 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -170,14 +170,20 @@ static void addmv_and_shuffle( int weight ) { - int i = *index; + int i; + int insert_point; int duplicate_found = FALSE; - // Check for duplicates. If there is one increment its score. - // Duplicate defined as being the same full pel vector with rounding. + // Check for duplicates. If there is one increase its score. + // We only compare vs the current top candidates. + insert_point = (*index < (MAX_MV_REF_CANDIDATES - 1)) + ? *index : (MAX_MV_REF_CANDIDATES - 1); + + i = insert_point; + if (*index > i) + i++; while (i > 0) { i--; - if (candidate_mv.as_int == mv_list[i].as_int) { duplicate_found = TRUE; mv_scores[i] += weight; @@ -185,11 +191,13 @@ static void addmv_and_shuffle( } } - // If no duplicate was found add the new vector and give it a weight - if (!duplicate_found) { - mv_list[*index].as_int = candidate_mv.as_int; - mv_scores[*index] = weight; - i = *index; + // If no duplicate and the new candidate is good enough then add it. + if (!duplicate_found ) { + if (weight > mv_scores[insert_point]) { + mv_list[insert_point].as_int = candidate_mv.as_int; + mv_scores[insert_point] = weight; + i = insert_point; + } (*index)++; } @@ -224,12 +232,12 @@ void vp9_find_mv_refs( int i; MODE_INFO *candidate_mi; MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - int_mv candidate_mvs[MAX_MV_REFS]; + int_mv candidate_mvs[MAX_MV_REF_CANDIDATES]; int_mv c_refmv; - MV_REFERENCE_FRAME c_ref_frame; int_mv c2_refmv; + MV_REFERENCE_FRAME c_ref_frame; MV_REFERENCE_FRAME c2_ref_frame; - int candidate_scores[MAX_MV_REFS]; + int candidate_scores[MAX_MV_REF_CANDIDATES]; int index = 0; int split_count = 0; int ref_weight = 0; @@ -238,8 +246,8 @@ void vp9_find_mv_refs( int *ref_distance_weight; // Blank the reference vector lists and other local structures. - vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REFS); - vpx_memset(candidate_mvs, 0, sizeof(int_mv) * MAX_MV_REFS); + vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES); + vpx_memset(candidate_mvs, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES); vpx_memset(candidate_scores, 0, sizeof(candidate_scores)); #if CONFIG_SUPERBLOCKS @@ -349,11 +357,6 @@ void vp9_find_mv_refs( } } - // Make sure we are able to add 0,0 - if (index > (MAX_MV_REFS - 1)) { - index = (MAX_MV_REFS - 1); - } - // Define inter mode coding context. // 0,0 was best if (candidate_mvs[0].as_int == 0) { @@ -383,14 +386,12 @@ void vp9_find_mv_refs( } // 0,0 is always a valid reference. - for (i = 0; i < index; ++i) { + for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { if (candidate_mvs[i].as_int == 0) break; } - if (i == index) { - c_refmv.as_int = 0; - addmv_and_shuffle(candidate_mvs, candidate_scores, - &index, c_refmv, candidate_scores[3]+1 ); + if (i == MAX_MV_REF_CANDIDATES) { + candidate_mvs[MAX_MV_REF_CANDIDATES-1].as_int = 0; } // Copy over the candidate list. diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index d5bf89c7d..4669d2ed6 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -70,7 +70,7 @@ typedef struct { PARTITION_INFO partition_info; int_mv best_ref_mv; int_mv second_best_ref_mv; - int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS]; + int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; int rate; int distortion; int64_t intra_error; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index b7f8cf49f..76176be65 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -395,8 +395,7 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x, vp9_mv_bit_cost(&target_mv, &mv_ref_list[0], x->nmvjointcost, x->mvcost, 96, xd->allow_high_precision_mv); - // Use 4 for now : for (i = 1; i < MAX_MV_REFS; ++i ) { - for (i = 1; i < 4; ++i) { + for (i = 1; i < MAX_MV_REF_CANDIDATES; ++i) { // If we see a 0,0 reference vector for a second time we have reached // the end of the list of valid candidate vectors. if (!mv_ref_list[i].as_int) { diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 8cabed9c9..448b56a08 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -3821,7 +3821,7 @@ static void encode_frame_to_data_rate { FILE *f = fopen("mv_ref_dist.stt", "a"); unsigned int i; - for (i = 0; i < MAX_MV_REFS; ++i) { + for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { fprintf(f, "%10d", cpi->best_ref_index_counts[0][i]); } fprintf(f, "\n" ); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 8180d16de..511e62f1c 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -790,7 +790,7 @@ typedef struct VP9_COMP { unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS]; #if CONFIG_NEW_MVREF - unsigned int best_ref_index_counts[MAX_REF_FRAMES][MAX_MV_REFS]; + unsigned int best_ref_index_counts[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; #endif } VP9_COMP; From 1009f76566524e88a322a8373ccc848128f8c201 Mon Sep 17 00:00:00 2001 From: Johann Date: Mon, 3 Dec 2012 14:19:49 -0800 Subject: [PATCH 06/77] Use 'vpx_scale' consistently Change-Id: I178352813d2b8702d081caf405de9dbad9af2cc3 --- vp8/common/postproc.c | 2 +- vp8/decoder/decodframe.c | 2 +- vp8/decoder/onyxd_if.c | 2 +- vp8/encoder/firstpass.c | 2 +- vp8/encoder/onyx_if.c | 2 +- vp8/encoder/picklpf.c | 2 +- vp8/encoder/temporal_filter.c | 2 +- vp9/common/vp9_postproc.c | 2 +- vp9/decoder/vp9_decodframe.c | 2 +- vp9/decoder/vp9_onyxd_if.c | 2 +- vp9/encoder/vp9_firstpass.c | 2 +- vp9/encoder/vp9_onyx_if.c | 2 +- vp9/encoder/vp9_picklpf.c | 2 +- vp9/encoder/vp9_temporal_filter.c | 2 +- vpx_scale/generic/gen_scalers.c | 2 +- vpx_scale/generic/{vpxscale.c => vpx_scale.c} | 0 vpx_scale/generic/yv12extend.c | 2 +- vpx_scale/{vpxscale.h => vpx_scale.h} | 0 vpx_scale/vpx_scale.mk | 4 ++-- vpx_scale/win32/scaleopt.c | 2 +- 20 files changed, 19 insertions(+), 19 deletions(-) rename vpx_scale/generic/{vpxscale.c => vpx_scale.c} (100%) rename vpx_scale/{vpxscale.h => vpx_scale.h} (100%) diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index c6442c92e..fd313b6fe 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -15,7 +15,7 @@ #include "vpx_scale/yv12config.h" #include "postproc.h" #include "common.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "systemdependent.h" #include diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index a22f37246..463211810 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -21,7 +21,7 @@ #include "vp8/common/alloccommon.h" #include "vp8/common/entropymode.h" #include "vp8/common/quant_common.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp8/common/setupintrarecon.h" #include "decodemv.h" diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index 459e34e06..24fc8783a 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -26,7 +26,7 @@ #include "vp8/common/quant_common.h" #include "./vpx_scale_rtcd.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp8/common/systemdependent.h" #include "vpx_ports/vpx_timer.h" #include "detokenize.h" diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index e5306a425..370e82293 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -21,7 +21,7 @@ #include "vp8/common/systemdependent.h" #include "mcomp.h" #include "firstpass.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "encodemb.h" #include "vp8/common/extend.h" #include "vpx_mem/vpx_mem.h" diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index e7d22701e..575033c2d 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -20,7 +20,7 @@ #include "mcomp.h" #include "firstpass.h" #include "psnr.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp8/common/extend.h" #include "ratectrl.h" #include "vp8/common/quant_common.h" diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c index 8114ec33a..841e1e452 100644 --- a/vp8/encoder/picklpf.c +++ b/vp8/encoder/picklpf.c @@ -14,7 +14,7 @@ #include "onyx_int.h" #include "quantize.h" #include "vpx_mem/vpx_mem.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp8/common/alloccommon.h" #include "vp8/common/loopfilter.h" #if ARCH_ARM diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index b83ae89ab..7e3af71ec 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -17,7 +17,7 @@ #include "mcomp.h" #include "firstpass.h" #include "psnr.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp8/common/extend.h" #include "ratectrl.h" #include "vp8/common/quant_common.h" diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c index f00edf00d..820630251 100644 --- a/vp9/common/vp9_postproc.c +++ b/vp9/common/vp9_postproc.c @@ -13,7 +13,7 @@ #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_postproc.h" #include "vp9/common/vp9_textblit.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp9/common/vp9_systemdependent.h" #include "./vp9_rtcd.h" #include "./vpx_scale_rtcd.h" diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index f95a83afa..76349ad86 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -20,7 +20,7 @@ #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_quant_common.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp9/common/vp9_setupintrarecon.h" #include "vp9/decoder/vp9_decodemv.h" diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index bad43cabd..9c0b581be 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -23,7 +23,7 @@ #include #include "vp9/common/vp9_quant_common.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp9/common/vp9_systemdependent.h" #include "vpx_ports/vpx_timer.h" #include "vp9/decoder/vp9_decodframe.h" diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 71a8cefe6..e62eeedaf 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -17,7 +17,7 @@ #include "vp9/common/vp9_setupintrarecon.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_firstpass.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/common/vp9_extend.h" diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 448b56a08..0fe7a14b6 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -18,7 +18,7 @@ #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_psnr.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp9/common/vp9_extend.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/common/vp9_quant_common.h" diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 90ea6f14a..865c45e82 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -14,7 +14,7 @@ #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_quantize.h" #include "vpx_mem/vpx_mem.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_loopfilter.h" #include "./vpx_scale_rtcd.h" diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 3a6844316..89d6b44a0 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -17,7 +17,7 @@ #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_psnr.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp9/common/vp9_extend.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/common/vp9_quant_common.h" diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c index febe97d94..5f355c5a6 100644 --- a/vpx_scale/generic/gen_scalers.c +++ b/vpx_scale/generic/gen_scalers.c @@ -9,7 +9,7 @@ */ -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vpx_mem/vpx_mem.h" /**************************************************************************** * Imports diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpx_scale.c similarity index 100% rename from vpx_scale/generic/vpxscale.c rename to vpx_scale/generic/vpx_scale.c diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c index 247078c0c..5a427356b 100644 --- a/vpx_scale/generic/yv12extend.c +++ b/vpx_scale/generic/yv12extend.c @@ -11,7 +11,7 @@ #include "vpx_scale/yv12config.h" #include "vpx_mem/vpx_mem.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" /**************************************************************************** * Exports diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpx_scale.h similarity index 100% rename from vpx_scale/vpxscale.h rename to vpx_scale/vpx_scale.h diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk index 3d759327a..11d3fd96d 100644 --- a/vpx_scale/vpx_scale.mk +++ b/vpx_scale/vpx_scale.mk @@ -1,7 +1,7 @@ SCALE_SRCS-yes += vpx_scale.mk SCALE_SRCS-yes += yv12config.h -SCALE_SRCS-yes += vpxscale.h -SCALE_SRCS-yes += generic/vpxscale.c +SCALE_SRCS-yes += vpx_scale.h +SCALE_SRCS-yes += generic/vpx_scale.c SCALE_SRCS-yes += generic/yv12config.c SCALE_SRCS-yes += generic/yv12extend.c SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c index 54f9ac069..4336ecea3 100644 --- a/vpx_scale/win32/scaleopt.c +++ b/vpx_scale/win32/scaleopt.c @@ -23,7 +23,7 @@ ****************************************************************************/ __declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 128, 128 }; -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vpx_mem/vpx_mem.h" __declspec(align(16)) const static unsigned short const54_2[] = { 0, 64, 128, 192 }; From 11a84b25b56c01f4516c560712c370d7a3c40f37 Mon Sep 17 00:00:00 2001 From: Johann Date: Mon, 3 Dec 2012 15:08:17 -0800 Subject: [PATCH 07/77] Remove last duck_ functions Change-Id: I5fbcd2006d05bfe841f3c7af9c1aeb2cb83b3149 --- vpx_mem/vpx_mem.h | 9 +-------- vpx_scale/generic/vpx_scale.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/vpx_mem/vpx_mem.h b/vpx_mem/vpx_mem.h index 3246e9298..c7321a95e 100644 --- a/vpx_mem/vpx_mem.h +++ b/vpx_mem/vpx_mem.h @@ -101,14 +101,7 @@ extern "C" { /* some defines for backward compatibility */ #define DMEM_GENERAL 0 -#define duck_memalign(X,Y,Z) vpx_memalign(X,Y) -#define duck_malloc(X,Y) vpx_malloc(X) -#define duck_calloc(X,Y,Z) vpx_calloc(X,Y) -#define duck_realloc vpx_realloc -#define duck_free vpx_free -#define duck_memcpy vpx_memcpy -#define duck_memmove vpx_memmove -#define duck_memset vpx_memset +// (*)< #if REPLACE_BUILTIN_FUNCTIONS # ifndef __VPX_MEM_C__ diff --git a/vpx_scale/generic/vpx_scale.c b/vpx_scale/generic/vpx_scale.c index 49cdb7bc7..c83c3b68f 100644 --- a/vpx_scale/generic/vpx_scale.c +++ b/vpx_scale/generic/vpx_scale.c @@ -432,7 +432,7 @@ void Scale2D temp_area + i * dest_pitch, 1, hratio, dest_width); } else { /* Duplicate the last row */ /* copy temp_area row 0 over from last row in the past */ - duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); + vpx_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); } } @@ -443,7 +443,7 @@ void Scale2D } /* copy temp_area row 0 over from last row in the past */ - duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); + vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); /* move to the next band */ source += source_band_height * source_pitch; @@ -498,11 +498,11 @@ void vp8_scale_frame if (dw < (int)dst->y_width) for (i = 0; i < dh; i++) - duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1); + vpx_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1); if (dh < (int)dst->y_height) for (i = dh - 1; i < (int)dst->y_height; i++) - duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); + vpx_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, @@ -510,11 +510,11 @@ void vp8_scale_frame if (dw / 2 < (int)dst->uv_width) for (i = 0; i < dst->uv_height; i++) - duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); + vpx_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); if (dh / 2 < (int)dst->uv_height) for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) - duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); + vpx_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, @@ -522,9 +522,9 @@ void vp8_scale_frame if (dw / 2 < (int)dst->uv_width) for (i = 0; i < dst->uv_height; i++) - duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); + vpx_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); if (dh / 2 < (int) dst->uv_height) for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) - duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); + vpx_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); } From a36d9a4a150c2f7e7eaa825d7ca0289aeac6c5b1 Mon Sep 17 00:00:00 2001 From: Johann Date: Mon, 3 Dec 2012 16:23:49 -0800 Subject: [PATCH 08/77] Move vp8_scale_frame to vpx namespace Change-Id: I92d613e89c8f1174eca0789116120bfa20c25c28 --- vp8/encoder/onyx_if.c | 2 +- vpx_scale/generic/vpx_scale.c | 4 ++-- vpx_scale/vpx_scale.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 575033c2d..aa09a1e3e 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -2599,7 +2599,7 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) Scale2Ratio(cm->horiz_scale, &hr, &hs); Scale2Ratio(cm->vert_scale, &vr, &vs); - vp8_scale_frame(sd, &cpi->scaled_source, cm->temp_scale_frame.y_buffer, + vpx_scale_frame(sd, &cpi->scaled_source, cm->temp_scale_frame.y_buffer, tmp_height, hs, hr, vs, vr, 0); vp8_yv12_extend_frame_borders(&cpi->scaled_source); diff --git a/vpx_scale/generic/vpx_scale.c b/vpx_scale/generic/vpx_scale.c index c83c3b68f..8044d2ad7 100644 --- a/vpx_scale/generic/vpx_scale.c +++ b/vpx_scale/generic/vpx_scale.c @@ -453,7 +453,7 @@ void Scale2D /**************************************************************************** * - * ROUTINE : + * ROUTINE : vpx_scale_frame * * INPUTS : YV12_BUFFER_CONFIG *src : Pointer to frame to be scaled. * YV12_BUFFER_CONFIG *dst : Pointer to buffer to hold scaled frame. @@ -475,7 +475,7 @@ void Scale2D * caching. * ****************************************************************************/ -void vp8_scale_frame +void vpx_scale_frame ( YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, diff --git a/vpx_scale/vpx_scale.h b/vpx_scale/vpx_scale.h index 308b6aab6..9ddf62efd 100644 --- a/vpx_scale/vpx_scale.h +++ b/vpx_scale/vpx_scale.h @@ -14,7 +14,7 @@ #include "vpx_scale/yv12config.h" -extern void vp8_scale_frame(YV12_BUFFER_CONFIG *src, +extern void vpx_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, unsigned char *temp_area, unsigned char temp_height, From c456b35fdf1b1e9fd3c964b822e9de05437544e2 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 7 Dec 2012 14:45:05 -0800 Subject: [PATCH 09/77] 32x32 transform for superblocks. This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds code all over the place to wrap that in the bitstream/encoder/decoder/RD. Some implementation notes (these probably need careful review): - token range is extended by 1 bit, since the value range out of this transform is [-16384,16383]. - the coefficients coming out of the FDCT are manually scaled back by 1 bit, or else they won't fit in int16_t (they are 17 bits). Because of this, the RD error scoring does not right-shift the MSE score by two (unlike for 4x4/8x8/16x16). - to compensate for this loss in precision, the quantizer is halved also. This is currently a little hacky. - FDCT and IDCT is double-only right now. Needs a fixed-point impl. - There are no default probabilities for the 32x32 transform yet; I'm simply using the 16x16 luma ones. A future commit will add newly generated probabilities for all transforms. - No ADST version. I don't think we'll add one for this level; if an ADST is desired, transform-size selection can scale back to 16x16 or lower, and use an ADST at that level. Additional notes specific to Debargha's DWT/DCT hybrid: - coefficient scale is different for the top/left 16x16 (DCT-over-DWT) block than for the rest (DWT pixel differences) of the block. Therefore, RD error scoring isn't easily scalable between coefficient and pixel domain. Thus, unfortunately, we need to compute the RD distortion in the pixel domain until we figure out how to scale these appropriately. Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b --- configure | 2 + test/dct32x32_test.cc | 189 ++++++++++++ test/test.mk | 3 + vp9/common/vp9_blockd.h | 21 +- vp9/common/vp9_default_coef_probs.h | 2 + vp9/common/vp9_entropy.c | 143 ++++++++- vp9/common/vp9_entropy.h | 15 +- vp9/common/vp9_idctllm.c | 462 ++++++++++++++++++++++++++++ vp9/common/vp9_invtrans.c | 13 + vp9/common/vp9_invtrans.h | 5 + vp9/common/vp9_loopfilter.c | 23 +- vp9/common/vp9_onyxc_int.h | 22 +- vp9/common/vp9_recon.c | 47 +++ vp9/common/vp9_rtcd_defs.sh | 6 + vp9/common/vp9_seg_common.c | 2 +- vp9/decoder/vp9_decodemv.c | 22 +- vp9/decoder/vp9_decodframe.c | 49 ++- vp9/decoder/vp9_dequantize.c | 27 ++ vp9/decoder/vp9_detokenize.c | 59 +++- vp9/decoder/vp9_detokenize.h | 6 + vp9/encoder/vp9_bitstream.c | 125 ++++++-- vp9/encoder/vp9_block.h | 24 +- vp9/encoder/vp9_dct.c | 458 +++++++++++++++++++++++++++ vp9/encoder/vp9_encodeframe.c | 127 +++++++- vp9/encoder/vp9_encodemb.c | 62 ++++ vp9/encoder/vp9_encodemb.h | 13 + vp9/encoder/vp9_onyx_if.c | 5 +- vp9/encoder/vp9_onyx_int.h | 25 +- vp9/encoder/vp9_quantize.c | 203 ++++++++++-- vp9/encoder/vp9_quantize.h | 5 + vp9/encoder/vp9_ratectrl.c | 6 + vp9/encoder/vp9_rdopt.c | 339 ++++++++++++++++---- vp9/encoder/vp9_tokenize.c | 145 ++++++++- vp9/encoder/vp9_tokenize.h | 13 + 34 files changed, 2512 insertions(+), 156 deletions(-) create mode 100644 test/dct32x32_test.cc diff --git a/configure b/configure index d1d25b3d5..c93ffd75f 100755 --- a/configure +++ b/configure @@ -247,6 +247,8 @@ EXPERIMENT_LIST=" implicit_segmentation newbintramodes comp_interintra_pred + tx32x32 + dwt32x32hybrid " CONFIG_LIST=" external_build diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc new file mode 100644 index 000000000..8f089c683 --- /dev/null +++ b/test/dct32x32_test.cc @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "third_party/googletest/src/include/gtest/gtest.h" + +extern "C" { +#include "vp9/common/vp9_entropy.h" +#include "./vp9_rtcd.h" + void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch); + void vp9_short_idct32x32_c(short *input, short *output, int pitch); +} + +#include "test/acm_random.h" +#include "vpx/vpx_integer.h" + +using libvpx_test::ACMRandom; + +namespace { + +#if !CONFIG_DWT32X32HYBRID +static const double kPi = 3.141592653589793238462643383279502884; +static void reference2_32x32_idct_2d(double *input, double *output) { + double x; + for (int l = 0; l < 32; ++l) { + for (int k = 0; k < 32; ++k) { + double s = 0; + for (int i = 0; i < 32; ++i) { + for (int j = 0; j < 32; ++j) { + x = cos(kPi * j * (l + 0.5) / 32.0) * + cos(kPi * i * (k + 0.5) / 32.0) * input[i * 32 + j] / 1024; + if (i != 0) + x *= sqrt(2.0); + if (j != 0) + x *= sqrt(2.0); + s += x; + } + } + output[k * 32 + l] = s / 4; + } + } +} + +static void reference_32x32_dct_1d(double in[32], double out[32], int stride) { + const double kInvSqrt2 = 0.707106781186547524400844362104; + for (int k = 0; k < 32; k++) { + out[k] = 0.0; + for (int n = 0; n < 32; n++) + out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0); + if (k == 0) + out[k] = out[k] * kInvSqrt2; + } +} + +static void reference_32x32_dct_2d(int16_t input[32*32], double output[32*32]) { + // First transform columns + for (int i = 0; i < 32; ++i) { + double temp_in[32], temp_out[32]; + for (int j = 0; j < 32; ++j) + temp_in[j] = input[j*32 + i]; + reference_32x32_dct_1d(temp_in, temp_out, 1); + for (int j = 0; j < 32; ++j) + output[j * 32 + i] = temp_out[j]; + } + // Then transform rows + for (int i = 0; i < 32; ++i) { + double temp_in[32], temp_out[32]; + for (int j = 0; j < 32; ++j) + temp_in[j] = output[j + i*32]; + reference_32x32_dct_1d(temp_in, temp_out, 1); + // Scale by some magic number + for (int j = 0; j < 32; ++j) + output[j + i * 32] = temp_out[j] / 4; + } +} + + +TEST(VP9Idct32x32Test, AccuracyCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + for (int i = 0; i < count_test_block; ++i) { + int16_t in[1024], coeff[1024]; + int16_t out_c[1024]; + double out_r[1024]; + + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < 1024; ++j) + in[j] = rnd.Rand8() - rnd.Rand8(); + + reference_32x32_dct_2d(in, out_r); + for (int j = 0; j < 1024; j++) + coeff[j] = round(out_r[j]); + vp9_short_idct32x32_c(coeff, out_c, 64); + for (int j = 0; j < 1024; ++j) { + const int diff = out_c[j] - in[j]; + const int error = diff * diff; + EXPECT_GE(1, error) + << "Error: 3x32 IDCT has error " << error + << " at index " << j; + } + + vp9_short_fdct32x32_c(in, out_c, 64); + for (int j = 0; j < 1024; ++j) { + const double diff = coeff[j] - out_c[j]; + const double error = diff * diff; + EXPECT_GE(1.0, error) + << "Error: 32x32 FDCT has error " << error + << " at index " << j; + } + } +} +#else // CONFIG_DWT32X32HYBRID + // TODO(rbultje/debargha): add DWT-specific tests +#endif // CONFIG_DWT32X32HYBRID +TEST(VP9Fdct32x32Test, AccuracyCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + unsigned int max_error = 0; + int64_t total_error = 0; + const int count_test_block = 1000; + for (int i = 0; i < count_test_block; ++i) { + int16_t test_input_block[1024]; + int16_t test_temp_block[1024]; + int16_t test_output_block[1024]; + + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < 1024; ++j) + test_input_block[j] = rnd.Rand8() - rnd.Rand8(); + + const int pitch = 64; + vp9_short_fdct32x32_c(test_input_block, test_temp_block, pitch); + vp9_short_idct32x32_c(test_temp_block, test_output_block, pitch); + + for (int j = 0; j < 1024; ++j) { + const unsigned diff = test_input_block[j] - test_output_block[j]; + const unsigned error = diff * diff; + if (max_error < error) + max_error = error; + total_error += error; + } + } + + EXPECT_GE(1u, max_error) + << "Error: 32x32 FDCT/IDCT has an individual roundtrip error > 1"; + + EXPECT_GE(count_test_block/10, total_error) + << "Error: 32x32 FDCT/IDCT has average roundtrip error > 1/10 per block"; +} + +TEST(VP9Fdct32x32Test, CoeffSizeCheck) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + const int count_test_block = 1000; + for (int i = 0; i < count_test_block; ++i) { + int16_t input_block[1024], input_extreme_block[1024]; + int16_t output_block[1024], output_extreme_block[1024]; + + // Initialize a test block with input range [-255, 255]. + for (int j = 0; j < 1024; ++j) { + input_block[j] = rnd.Rand8() - rnd.Rand8(); + input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255; + } + if (i == 0) + for (int j = 0; j < 1024; ++j) + input_extreme_block[j] = 255; + + const int pitch = 32; + vp9_short_fdct32x32_c(input_block, output_block, pitch); + vp9_short_fdct32x32_c(input_extreme_block, output_extreme_block, pitch); + + // The minimum quant value is 4. + for (int j = 0; j < 1024; ++j) { + EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j])) + << "Error: 32x32 FDCT has coefficient larger than 4*DCT_MAX_VALUE"; + EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j])) + << "Error: 32x32 FDCT extreme has coefficient larger than " + "4*DCT_MAX_VALUE"; + } + } +} +} // namespace diff --git a/test/test.mk b/test/test.mk index 4fb464e64..919cf0438 100644 --- a/test/test.mk +++ b/test/test.mk @@ -64,6 +64,9 @@ endif LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc #LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc +ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_TX32X32),yesyes) +LIBVPX_TEST_SRCS-yes += dct32x32_test.cc +endif LIBVPX_TEST_SRCS-yes += idct8x8_test.cc LIBVPX_TEST_SRCS-yes += variance_test.cc endif # VP9 diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 27ef22fff..11efd4475 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -129,7 +129,13 @@ typedef enum { TX_4X4, // 4x4 dct transform TX_8X8, // 8x8 dct transform TX_16X16, // 16x16 dct transform - TX_SIZE_MAX // Number of different transforms available + TX_SIZE_MAX_MB, // Number of transforms available to MBs +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + TX_32X32 = TX_SIZE_MAX_MB, // 32x32 dct transform + TX_SIZE_MAX_SB, // Number of transforms available to SBs +#else + TX_SIZE_MAX_SB = TX_SIZE_MAX_MB, +#endif } TX_SIZE; typedef enum { @@ -302,6 +308,15 @@ typedef struct blockd { union b_mode_info bmi; } BLOCKD; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +typedef struct superblockd { + /* 32x32 Y and 16x16 U/V. No 2nd order transform yet. */ + DECLARE_ALIGNED(16, short, diff[32*32+16*16*2]); + DECLARE_ALIGNED(16, short, qcoeff[32*32+16*16*2]); + DECLARE_ALIGNED(16, short, dqcoeff[32*32+16*16*2]); +} SUPERBLOCKD; +#endif + typedef struct macroblockd { DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */ DECLARE_ALIGNED(16, unsigned char, predictor[384]); @@ -309,6 +324,10 @@ typedef struct macroblockd { DECLARE_ALIGNED(16, short, dqcoeff[400]); DECLARE_ALIGNED(16, unsigned short, eobs[25]); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + SUPERBLOCKD sb_coeff_data; +#endif + /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */ BLOCKD block[25]; int fullpixel_mask; diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index 52fb02f36..31103adb7 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -1375,3 +1375,5 @@ static const vp9_prob } } }; + +#define default_coef_probs_32x32 default_coef_probs_16x16 diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 4832b4e9c..321fa8c57 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -132,6 +132,109 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, 255, }; +DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { + 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; +DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { + 0, 1, 32, 64, 33, 2, 3, 34, 65, 96, 128, 97, 66, 35, 4, 5, 36, 67, 98, 129, 160, 192, 161, 130, 99, 68, 37, 6, 7, 38, 69, 100, + 131, 162, 193, 224, 256, 225, 194, 163, 132, 101, 70, 39, 8, 9, 40, 71, 102, 133, 164, 195, 226, 257, 288, 320, 289, 258, 227, 196, 165, 134, 103, 72, + 41, 10, 11, 42, 73, 104, 135, 166, 197, 228, 259, 290, 321, 352, 384, 353, 322, 291, 260, 229, 198, 167, 136, 105, 74, 43, 12, 13, 44, 75, 106, 137, + 168, 199, 230, 261, 292, 323, 354, 385, 416, 448, 417, 386, 355, 324, 293, 262, 231, 200, 169, 138, 107, 76, 45, 14, 15, 46, 77, 108, 139, 170, 201, 232, + 263, 294, 325, 356, 387, 418, 449, 480, 512, 481, 450, 419, 388, 357, 326, 295, 264, 233, 202, 171, 140, 109, 78, 47, 16, 17, 48, 79, 110, 141, 172, 203, + 234, 265, 296, 327, 358, 389, 420, 451, 482, 513, 544, 576, 545, 514, 483, 452, 421, 390, 359, 328, 297, 266, 235, 204, 173, 142, 111, 80, 49, 18, 19, 50, + 81, 112, 143, 174, 205, 236, 267, 298, 329, 360, 391, 422, 453, 484, 515, 546, 577, 608, 640, 609, 578, 547, 516, 485, 454, 423, 392, 361, 330, 299, 268, 237, + 206, 175, 144, 113, 82, 51, 20, 21, 52, 83, 114, 145, 176, 207, 238, 269, 300, 331, 362, 393, 424, 455, 486, 517, 548, 579, 610, 641, 672, 704, 673, 642, + 611, 580, 549, 518, 487, 456, 425, 394, 363, 332, 301, 270, 239, 208, 177, 146, 115, 84, 53, 22, 23, 54, 85, 116, 147, 178, 209, 240, 271, 302, 333, 364, + 395, 426, 457, 488, 519, 550, 581, 612, 643, 674, 705, 736, 768, 737, 706, 675, 644, 613, 582, 551, 520, 489, 458, 427, 396, 365, 334, 303, 272, 241, 210, 179, + 148, 117, 86, 55, 24, 25, 56, 87, 118, 149, 180, 211, 242, 273, 304, 335, 366, 397, 428, 459, 490, 521, 552, 583, 614, 645, 676, 707, 738, 769, 800, 832, + 801, 770, 739, 708, 677, 646, 615, 584, 553, 522, 491, 460, 429, 398, 367, 336, 305, 274, 243, 212, 181, 150, 119, 88, 57, 26, 27, 58, 89, 120, 151, 182, + 213, 244, 275, 306, 337, 368, 399, 430, 461, 492, 523, 554, 585, 616, 647, 678, 709, 740, 771, 802, 833, 864, 896, 865, 834, 803, 772, 741, 710, 679, 648, 617, + 586, 555, 524, 493, 462, 431, 400, 369, 338, 307, 276, 245, 214, 183, 152, 121, 90, 59, 28, 29, 60, 91, 122, 153, 184, 215, 246, 277, 308, 339, 370, 401, + 432, 463, 494, 525, 556, 587, 618, 649, 680, 711, 742, 773, 804, 835, 866, 897, 928, 960, 929, 898, 867, 836, 805, 774, 743, 712, 681, 650, 619, 588, 557, 526, + 495, 464, 433, 402, 371, 340, 309, 278, 247, 216, 185, 154, 123, 92, 61, 30, 31, 62, 93, 124, 155, 186, 217, 248, 279, 310, 341, 372, 403, 434, 465, 496, + 527, 558, 589, 620, 651, 682, 713, 744, 775, 806, 837, 868, 899, 930, 961, 992, 993, 962, 931, 900, 869, 838, 807, 776, 745, 714, 683, 652, 621, 590, 559, 528, + 497, 466, 435, 404, 373, 342, 311, 280, 249, 218, 187, 156, 125, 94, 63, 95, 126, 157, 188, 219, 250, 281, 312, 343, 374, 405, 436, 467, 498, 529, 560, 591, + 622, 653, 684, 715, 746, 777, 808, 839, 870, 901, 932, 963, 994, 995, 964, 933, 902, 871, 840, 809, 778, 747, 716, 685, 654, 623, 592, 561, 530, 499, 468, 437, + 406, 375, 344, 313, 282, 251, 220, 189, 158, 127, 159, 190, 221, 252, 283, 314, 345, 376, 407, 438, 469, 500, 531, 562, 593, 624, 655, 686, 717, 748, 779, 810, + 841, 872, 903, 934, 965, 996, 997, 966, 935, 904, 873, 842, 811, 780, 749, 718, 687, 656, 625, 594, 563, 532, 501, 470, 439, 408, 377, 346, 315, 284, 253, 222, + 191, 223, 254, 285, 316, 347, 378, 409, 440, 471, 502, 533, 564, 595, 626, 657, 688, 719, 750, 781, 812, 843, 874, 905, 936, 967, 998, 999, 968, 937, 906, 875, + 844, 813, 782, 751, 720, 689, 658, 627, 596, 565, 534, 503, 472, 441, 410, 379, 348, 317, 286, 255, 287, 318, 349, 380, 411, 442, 473, 504, 535, 566, 597, 628, + 659, 690, 721, 752, 783, 814, 845, 876, 907, 938, 969, 1000, 1001, 970, 939, 908, 877, 846, 815, 784, 753, 722, 691, 660, 629, 598, 567, 536, 505, 474, 443, 412, + 381, 350, 319, 351, 382, 413, 444, 475, 506, 537, 568, 599, 630, 661, 692, 723, 754, 785, 816, 847, 878, 909, 940, 971, 1002, 1003, 972, 941, 910, 879, 848, 817, + 786, 755, 724, 693, 662, 631, 600, 569, 538, 507, 476, 445, 414, 383, 415, 446, 477, 508, 539, 570, 601, 632, 663, 694, 725, 756, 787, 818, 849, 880, 911, 942, + 973, 1004, 1005, 974, 943, 912, 881, 850, 819, 788, 757, 726, 695, 664, 633, 602, 571, 540, 509, 478, 447, 479, 510, 541, 572, 603, 634, 665, 696, 727, 758, 789, + 820, 851, 882, 913, 944, 975, 1006, 1007, 976, 945, 914, 883, 852, 821, 790, 759, 728, 697, 666, 635, 604, 573, 542, 511, 543, 574, 605, 636, 667, 698, 729, 760, + 791, 822, 853, 884, 915, 946, 977, 1008, 1009, 978, 947, 916, 885, 854, 823, 792, 761, 730, 699, 668, 637, 606, 575, 607, 638, 669, 700, 731, 762, 793, 824, 855, + 886, 917, 948, 979, 1010, 1011, 980, 949, 918, 887, 856, 825, 794, 763, 732, 701, 670, 639, 671, 702, 733, 764, 795, 826, 857, 888, 919, 950, 981, 1012, 1013, 982, + 951, 920, 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, 799, 830, 861, 892, + 923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023, +}; /* Array indices are identical to previously-existing CONTEXT_NODE indices */ @@ -160,10 +263,11 @@ static const Prob Pcat2[] = { 165, 145}; static const Prob Pcat3[] = { 173, 148, 140}; static const Prob Pcat4[] = { 176, 155, 140, 135}; static const Prob Pcat5[] = { 180, 157, 141, 134, 130}; -static const Prob Pcat6[] = -{ 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129}; +static const Prob Pcat6[] = { + 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 +}; -static vp9_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[26]; +static vp9_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[28]; static void init_bit_tree(vp9_tree_index *p, int n) { int i = 0; @@ -182,7 +286,7 @@ static void init_bit_trees() { init_bit_tree(cat3, 3); init_bit_tree(cat4, 4); init_bit_tree(cat5, 5); - init_bit_tree(cat6, 13); + init_bit_tree(cat6, 14); } vp9_extra_bit_struct vp9_extra_bits[12] = { @@ -196,7 +300,7 @@ vp9_extra_bit_struct vp9_extra_bits[12] = { { cat3, Pcat3, 3, 11}, { cat4, Pcat4, 4, 19}, { cat5, Pcat5, 5, 35}, - { cat6, Pcat6, 13, 67}, + { cat6, Pcat6, 14, 67}, { 0, 0, 0, 0} }; @@ -218,6 +322,11 @@ void vp9_default_coef_probs(VP9_COMMON *pc) { vpx_memcpy(pc->fc.hybrid_coef_probs_16x16, default_hybrid_coef_probs_16x16, sizeof(pc->fc.hybrid_coef_probs_16x16)); + +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vpx_memcpy(pc->fc.coef_probs_32x32, default_coef_probs_32x32, + sizeof(pc->fc.coef_probs_32x32)); +#endif } void vp9_coef_tree_initialize() { @@ -444,4 +553,28 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) { else cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = prob; } } + +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + for (i = 0; i < BLOCK_TYPES_32X32; ++i) + for (j = 0; j < COEF_BANDS; ++j) + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + continue; + vp9_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, + coef_probs, branch_ct, cm->fc.coef_counts_32x32[i][j][k], 256, 1); + for (t = 0; t < ENTROPY_NODES; ++t) { + int prob; + count = branch_ct[t][0] + branch_ct[t][1]; + count = count > count_sat ? count_sat : count; + factor = (update_factor * count / count_sat); + prob = ((int)cm->fc.pre_coef_probs_32x32[i][j][k][t] * + (256 - factor) + + (int)coef_probs[t] * factor + 128) >> 8; + if (prob <= 0) cm->fc.coef_probs_32x32[i][j][k][t] = 1; + else if (prob > 255) cm->fc.coef_probs_32x32[i][j][k][t] = 255; + else cm->fc.coef_probs_32x32[i][j][k][t] = prob; + } + } +#endif } diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 3c74de7be..96d964448 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -55,7 +55,7 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ #define PROB_UPDATE_BASELINE_COST 7 #define MAX_PROB 255 -#define DCT_MAX_VALUE 8192 +#define DCT_MAX_VALUE 16384 /* Coefficients are predicted via a 3-dimensional probability table. */ @@ -66,6 +66,10 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ #define BLOCK_TYPES_16X16 4 +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#define BLOCK_TYPES_32X32 4 +#endif + /* Middle dimension is a coarsening of the coefficient's position within the 4x4 DCT. */ @@ -73,6 +77,9 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ extern DECLARE_ALIGNED(16, const int, vp9_coef_bands[16]); extern DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]); extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]); +#endif /* Inside dimension is 3-valued measure of nearby complexity, that is, the extent to which nearby coefficients are nonzero. For the first @@ -106,9 +113,13 @@ extern DECLARE_ALIGNED(16, const int, vp9_col_scan[16]); extern DECLARE_ALIGNED(16, const int, vp9_row_scan[16]); extern DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]); +extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]); +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]); +#endif + void vp9_coef_tree_initialize(void); -extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]); void vp9_adapt_coef_probs(struct VP9Common *); #endif diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 9622dfdee..cc685b99e 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -1774,3 +1774,465 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { #undef RIGHT_SHIFT #undef RIGHT_ROUNDING #endif + +#if CONFIG_TX32X32 +#if !CONFIG_DWT32X32HYBRID +#define DownshiftMultiplyBy2(x) x * 2 +#define DownshiftMultiply(x) x +static void idct16(double *input, double *output, int stride) { + static const double C1 = 0.995184726672197; + static const double C2 = 0.98078528040323; + static const double C3 = 0.956940335732209; + static const double C4 = 0.923879532511287; + static const double C5 = 0.881921264348355; + static const double C6 = 0.831469612302545; + static const double C7 = 0.773010453362737; + static const double C8 = 0.707106781186548; + static const double C9 = 0.634393284163646; + static const double C10 = 0.555570233019602; + static const double C11 = 0.471396736825998; + static const double C12 = 0.38268343236509; + static const double C13 = 0.290284677254462; + static const double C14 = 0.195090322016128; + static const double C15 = 0.098017140329561; + + double step[16]; + double intermediate[16]; + double temp1, temp2; + + // step 1 and 2 + step[ 0] = input[stride*0] + input[stride*8]; + step[ 1] = input[stride*0] - input[stride*8]; + + temp1 = input[stride*4]*C12; + temp2 = input[stride*12]*C4; + + temp1 -= temp2; + temp1 = DownshiftMultiply(temp1); + temp1 *= C8; + + step[ 2] = DownshiftMultiplyBy2(temp1); + + temp1 = input[stride*4]*C4; + temp2 = input[stride*12]*C12; + temp1 += temp2; + temp1 = DownshiftMultiply(temp1); + temp1 *= C8; + step[ 3] = DownshiftMultiplyBy2(temp1); + + temp1 = input[stride*2]*C8; + temp1 = DownshiftMultiplyBy2(temp1); + temp2 = input[stride*6] + input[stride*10]; + + step[ 4] = temp1 + temp2; + step[ 5] = temp1 - temp2; + + temp1 = input[stride*14]*C8; + temp1 = DownshiftMultiplyBy2(temp1); + temp2 = input[stride*6] - input[stride*10]; + + step[ 6] = temp2 - temp1; + step[ 7] = temp2 + temp1; + + // for odd input + temp1 = input[stride*3]*C12; + temp2 = input[stride*13]*C4; + temp1 += temp2; + temp1 = DownshiftMultiply(temp1); + temp1 *= C8; + intermediate[ 8] = DownshiftMultiplyBy2(temp1); + + temp1 = input[stride*3]*C4; + temp2 = input[stride*13]*C12; + temp2 -= temp1; + temp2 = DownshiftMultiply(temp2); + temp2 *= C8; + intermediate[ 9] = DownshiftMultiplyBy2(temp2); + + intermediate[10] = DownshiftMultiplyBy2(input[stride*9]*C8); + intermediate[11] = input[stride*15] - input[stride*1]; + intermediate[12] = input[stride*15] + input[stride*1]; + intermediate[13] = DownshiftMultiplyBy2((input[stride*7]*C8)); + + temp1 = input[stride*11]*C12; + temp2 = input[stride*5]*C4; + temp2 -= temp1; + temp2 = DownshiftMultiply(temp2); + temp2 *= C8; + intermediate[14] = DownshiftMultiplyBy2(temp2); + + temp1 = input[stride*11]*C4; + temp2 = input[stride*5]*C12; + temp1 += temp2; + temp1 = DownshiftMultiply(temp1); + temp1 *= C8; + intermediate[15] = DownshiftMultiplyBy2(temp1); + + step[ 8] = intermediate[ 8] + intermediate[14]; + step[ 9] = intermediate[ 9] + intermediate[15]; + step[10] = intermediate[10] + intermediate[11]; + step[11] = intermediate[10] - intermediate[11]; + step[12] = intermediate[12] + intermediate[13]; + step[13] = intermediate[12] - intermediate[13]; + step[14] = intermediate[ 8] - intermediate[14]; + step[15] = intermediate[ 9] - intermediate[15]; + + // step 3 + output[stride*0] = step[ 0] + step[ 3]; + output[stride*1] = step[ 1] + step[ 2]; + output[stride*2] = step[ 1] - step[ 2]; + output[stride*3] = step[ 0] - step[ 3]; + + temp1 = step[ 4]*C14; + temp2 = step[ 7]*C2; + temp1 -= temp2; + output[stride*4] = DownshiftMultiply(temp1); + + temp1 = step[ 4]*C2; + temp2 = step[ 7]*C14; + temp1 += temp2; + output[stride*7] = DownshiftMultiply(temp1); + + temp1 = step[ 5]*C10; + temp2 = step[ 6]*C6; + temp1 -= temp2; + output[stride*5] = DownshiftMultiply(temp1); + + temp1 = step[ 5]*C6; + temp2 = step[ 6]*C10; + temp1 += temp2; + output[stride*6] = DownshiftMultiply(temp1); + + output[stride*8] = step[ 8] + step[11]; + output[stride*9] = step[ 9] + step[10]; + output[stride*10] = step[ 9] - step[10]; + output[stride*11] = step[ 8] - step[11]; + output[stride*12] = step[12] + step[15]; + output[stride*13] = step[13] + step[14]; + output[stride*14] = step[13] - step[14]; + output[stride*15] = step[12] - step[15]; + + // output 4 + step[ 0] = output[stride*0] + output[stride*7]; + step[ 1] = output[stride*1] + output[stride*6]; + step[ 2] = output[stride*2] + output[stride*5]; + step[ 3] = output[stride*3] + output[stride*4]; + step[ 4] = output[stride*3] - output[stride*4]; + step[ 5] = output[stride*2] - output[stride*5]; + step[ 6] = output[stride*1] - output[stride*6]; + step[ 7] = output[stride*0] - output[stride*7]; + + temp1 = output[stride*8]*C7; + temp2 = output[stride*15]*C9; + temp1 -= temp2; + step[ 8] = DownshiftMultiply(temp1); + + temp1 = output[stride*9]*C11; + temp2 = output[stride*14]*C5; + temp1 += temp2; + step[ 9] = DownshiftMultiply(temp1); + + temp1 = output[stride*10]*C3; + temp2 = output[stride*13]*C13; + temp1 -= temp2; + step[10] = DownshiftMultiply(temp1); + + temp1 = output[stride*11]*C15; + temp2 = output[stride*12]*C1; + temp1 += temp2; + step[11] = DownshiftMultiply(temp1); + + temp1 = output[stride*11]*C1; + temp2 = output[stride*12]*C15; + temp2 -= temp1; + step[12] = DownshiftMultiply(temp2); + + temp1 = output[stride*10]*C13; + temp2 = output[stride*13]*C3; + temp1 += temp2; + step[13] = DownshiftMultiply(temp1); + + temp1 = output[stride*9]*C5; + temp2 = output[stride*14]*C11; + temp2 -= temp1; + step[14] = DownshiftMultiply(temp2); + + temp1 = output[stride*8]*C9; + temp2 = output[stride*15]*C7; + temp1 += temp2; + step[15] = DownshiftMultiply(temp1); + + // step 5 + output[stride*0] = step[0] + step[15]; + output[stride*1] = step[1] + step[14]; + output[stride*2] = step[2] + step[13]; + output[stride*3] = step[3] + step[12]; + output[stride*4] = step[4] + step[11]; + output[stride*5] = step[5] + step[10]; + output[stride*6] = step[6] + step[ 9]; + output[stride*7] = step[7] + step[ 8]; + + output[stride*15] = step[0] - step[15]; + output[stride*14] = step[1] - step[14]; + output[stride*13] = step[2] - step[13]; + output[stride*12] = step[3] - step[12]; + output[stride*11] = step[4] - step[11]; + output[stride*10] = step[5] - step[10]; + output[stride*9] = step[6] - step[ 9]; + output[stride*8] = step[7] - step[ 8]; +} +static void butterfly_32_idct_1d(double *input, double *output, int stride) { + static const double C1 = 0.998795456205; // cos(pi * 1 / 64) + static const double C3 = 0.989176509965; // cos(pi * 3 / 64) + static const double C5 = 0.970031253195; // cos(pi * 5 / 64) + static const double C7 = 0.941544065183; // cos(pi * 7 / 64) + static const double C9 = 0.903989293123; // cos(pi * 9 / 64) + static const double C11 = 0.857728610000; // cos(pi * 11 / 64) + static const double C13 = 0.803207531481; // cos(pi * 13 / 64) + static const double C15 = 0.740951125355; // cos(pi * 15 / 64) + static const double C16 = 0.707106781187; // cos(pi * 16 / 64) + static const double C17 = 0.671558954847; // cos(pi * 17 / 64) + static const double C19 = 0.595699304492; // cos(pi * 19 / 64) + static const double C21 = 0.514102744193; // cos(pi * 21 / 64) + static const double C23 = 0.427555093430; // cos(pi * 23 / 64) + static const double C25 = 0.336889853392; // cos(pi * 25 / 64) + static const double C27 = 0.242980179903; // cos(pi * 27 / 64) + static const double C29 = 0.146730474455; // cos(pi * 29 / 64) + static const double C31 = 0.049067674327; // cos(pi * 31 / 64) + + double step1[32]; + double step2[32]; + + step1[ 0] = input[stride*0]; + step1[ 1] = input[stride*2]; + step1[ 2] = input[stride*4]; + step1[ 3] = input[stride*6]; + step1[ 4] = input[stride*8]; + step1[ 5] = input[stride*10]; + step1[ 6] = input[stride*12]; + step1[ 7] = input[stride*14]; + step1[ 8] = input[stride*16]; + step1[ 9] = input[stride*18]; + step1[10] = input[stride*20]; + step1[11] = input[stride*22]; + step1[12] = input[stride*24]; + step1[13] = input[stride*26]; + step1[14] = input[stride*28]; + step1[15] = input[stride*30]; + + step1[16] = DownshiftMultiplyBy2(input[stride*1]*C16); + step1[17] = (input[stride*3] + input[stride*1]); + step1[18] = (input[stride*5] + input[stride*3]); + step1[19] = (input[stride*7] + input[stride*5]); + step1[20] = (input[stride*9] + input[stride*7]); + step1[21] = (input[stride*11] + input[stride*9]); + step1[22] = (input[stride*13] + input[stride*11]); + step1[23] = (input[stride*15] + input[stride*13]); + step1[24] = (input[stride*17] + input[stride*15]); + step1[25] = (input[stride*19] + input[stride*17]); + step1[26] = (input[stride*21] + input[stride*19]); + step1[27] = (input[stride*23] + input[stride*21]); + step1[28] = (input[stride*25] + input[stride*23]); + step1[29] = (input[stride*27] + input[stride*25]); + step1[30] = (input[stride*29] + input[stride*27]); + step1[31] = (input[stride*31] + input[stride*29]); + + idct16(step1, step2, 1); + idct16(step1 + 16, step2 + 16, 1); + + step2[16] = DownshiftMultiply(step2[16] / (2*C1)); + step2[17] = DownshiftMultiply(step2[17] / (2*C3)); + step2[18] = DownshiftMultiply(step2[18] / (2*C5)); + step2[19] = DownshiftMultiply(step2[19] / (2*C7)); + step2[20] = DownshiftMultiply(step2[20] / (2*C9)); + step2[21] = DownshiftMultiply(step2[21] / (2*C11)); + step2[22] = DownshiftMultiply(step2[22] / (2*C13)); + step2[23] = DownshiftMultiply(step2[23] / (2*C15)); + step2[24] = DownshiftMultiply(step2[24] / (2*C17)); + step2[25] = DownshiftMultiply(step2[25] / (2*C19)); + step2[26] = DownshiftMultiply(step2[26] / (2*C21)); + step2[27] = DownshiftMultiply(step2[27] / (2*C23)); + step2[28] = DownshiftMultiply(step2[28] / (2*C25)); + step2[29] = DownshiftMultiply(step2[29] / (2*C27)); + step2[30] = DownshiftMultiply(step2[30] / (2*C29)); + step2[31] = DownshiftMultiply(step2[31] / (2*C31)); + + output[stride* 0] = step2[ 0] + step2[16]; + output[stride* 1] = step2[ 1] + step2[17]; + output[stride* 2] = step2[ 2] + step2[18]; + output[stride* 3] = step2[ 3] + step2[19]; + output[stride* 4] = step2[ 4] + step2[20]; + output[stride* 5] = step2[ 5] + step2[21]; + output[stride* 6] = step2[ 6] + step2[22]; + output[stride* 7] = step2[ 7] + step2[23]; + output[stride* 8] = step2[ 8] + step2[24]; + output[stride* 9] = step2[ 9] + step2[25]; + output[stride*10] = step2[10] + step2[26]; + output[stride*11] = step2[11] + step2[27]; + output[stride*12] = step2[12] + step2[28]; + output[stride*13] = step2[13] + step2[29]; + output[stride*14] = step2[14] + step2[30]; + output[stride*15] = step2[15] + step2[31]; + output[stride*16] = step2[15] - step2[(31 - 0)]; + output[stride*17] = step2[14] - step2[(31 - 1)]; + output[stride*18] = step2[13] - step2[(31 - 2)]; + output[stride*19] = step2[12] - step2[(31 - 3)]; + output[stride*20] = step2[11] - step2[(31 - 4)]; + output[stride*21] = step2[10] - step2[(31 - 5)]; + output[stride*22] = step2[ 9] - step2[(31 - 6)]; + output[stride*23] = step2[ 8] - step2[(31 - 7)]; + output[stride*24] = step2[ 7] - step2[(31 - 8)]; + output[stride*25] = step2[ 6] - step2[(31 - 9)]; + output[stride*26] = step2[ 5] - step2[(31 - 10)]; + output[stride*27] = step2[ 4] - step2[(31 - 11)]; + output[stride*28] = step2[ 3] - step2[(31 - 12)]; + output[stride*29] = step2[ 2] - step2[(31 - 13)]; + output[stride*30] = step2[ 1] - step2[(31 - 14)]; + output[stride*31] = step2[ 0] - step2[(31 - 15)]; +} + +void vp9_short_idct32x32_c(short *input, short *output, int pitch) { + vp9_clear_system_state(); // Make it simd safe : __asm emms; + { + double out[32*32], out2[32*32]; + const int short_pitch = pitch >> 1; + int i, j; + // First transform rows + for (i = 0; i < 32; ++i) { + double temp_in[32], temp_out[32]; + for (j = 0; j < 32; ++j) + temp_in[j] = input[j + i*short_pitch]; + butterfly_32_idct_1d(temp_in, temp_out, 1); + for (j = 0; j < 32; ++j) + out[j + i*32] = temp_out[j]; + } + // Then transform columns + for (i = 0; i < 32; ++i) { + double temp_in[32], temp_out[32]; + for (j = 0; j < 32; ++j) + temp_in[j] = out[j*32 + i]; + butterfly_32_idct_1d(temp_in, temp_out, 1); + for (j = 0; j < 32; ++j) + out2[j*32 + i] = temp_out[j]; + } + for (i = 0; i < 32*32; ++i) + output[i] = round(out2[i]/128); + } + vp9_clear_system_state(); // Make it simd safe : __asm emms; +} +#else // CONFIG_DWT32X32HYBRID + +#define MAX_BLOCK_LENGTH 64 +#define ENH_PRECISION_BITS 1 +#define ENH_PRECISION_RND ((1 << ENH_PRECISION_BITS) / 2) + +// Note: block length must be even for this implementation +static void synthesis_53_row(int length, short *lowpass, short *highpass, + short *x) { + short r, * a, * b; + int n; + + n = length >> 1; + b = highpass; + a = lowpass; + r = *highpass; + while (n--) { + *a++ -= (r + (*b) + 1) >> 1; + r = *b++; + } + + n = length >> 1; + b = highpass; + a = lowpass; + while (--n) { + *x++ = ((r = *a++) + 1) >> 1; + *x++ = *b++ + ((r + (*a) + 2) >> 2); + } + *x++ = ((r = *a) + 1)>>1; + *x++ = *b + ((r+1)>>1); +} + +static void synthesis_53_col(int length, short *lowpass, short *highpass, + short *x) { + short r, * a, * b; + int n; + + n = length >> 1; + b = highpass; + a = lowpass; + r = *highpass; + while (n--) { + *a++ -= (r + (*b) + 1) >> 1; + r = *b++; + } + + n = length >> 1; + b = highpass; + a = lowpass; + while (--n) { + *x++ = r = *a++; + *x++ = ((*b++) << 1) + ((r + (*a) + 1) >> 1); + } + *x++ = r = *a; + *x++ = ((*b) << 1) + r; +} + +// NOTE: Using a 5/3 integer wavelet for now. Explore using a wavelet +// with a better response later +void dyadic_synthesize(int levels, int width, int height, short *c, int pitch_c, + short *x, int pitch_x) { + int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; + short buffer[2 * MAX_BLOCK_LENGTH]; + + th[0] = hh; + tw[0] = hw; + for (i = 1; i <= levels; i++) { + th[i] = (th[i - 1] + 1) >> 1; + tw[i] = (tw[i - 1] + 1) >> 1; + } + for (lv = levels - 1; lv >= 0; lv--) { + nh = th[lv]; + nw = tw[lv]; + hh = th[lv + 1]; + hw = tw[lv + 1]; + if ((nh < 2) || (nw < 2)) continue; + for (j = 0; j < nw; j++) { + for (i = 0; i < nh; i++) + buffer[i] = c[i * pitch_c + j]; + synthesis_53_col(nh, buffer, buffer + hh, buffer + nh); + for (i = 0; i < nh; i++) + c[i * pitch_c + j] = buffer[i + nh]; + } + for (i = 0; i < nh; i++) { + memcpy(buffer, &c[i * pitch_c], nw * sizeof(short)); + synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]); + } + } + for (i = 0; i < height; i++) + for (j = 0; j < width; j++) + x[i * pitch_x + j] = (c[i * pitch_c + j] + ENH_PRECISION_RND) >> + ENH_PRECISION_BITS; +} + +void vp9_short_idct32x32_c(short *input, short *output, int pitch) { + // assume out is a 32x32 buffer + short buffer[16 * 16]; + short buffer2[32 * 32]; + const int short_pitch = pitch >> 1; + int i; + // TODO(debargha): Implement more efficiently by adding output pitch + // argument to the idct16x16 function + vp9_short_idct16x16_c(input, buffer, pitch); + for (i = 0; i < 16; ++i) { + vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(short) * 16); + vpx_memcpy(buffer2 + i * 32 + 16, input + i * short_pitch + 16, + sizeof(short) * 16); + } + for (; i < 32; ++i) { + vpx_memcpy(buffer2 + i * 32, input + i * short_pitch, + sizeof(short) * 32); + } + dyadic_synthesize(1, 32, 32, buffer2, 32, output, 32); +} +#endif // CONFIG_DWT32X32HYBRID +#endif // CONFIG_TX32X32 diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index c78f1ad3c..3abf32894 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -143,3 +143,16 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) { vp9_inverse_transform_mby_16x16(xd); vp9_inverse_transform_mbuv_8x8(xd); } + +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb) { + vp9_short_idct32x32(xd_sb->dqcoeff, xd_sb->diff, 64); +} + +void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb) { + vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1024, + xd_sb->diff + 1024, 32); + vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1280, + xd_sb->diff + 1280, 32); +} +#endif diff --git a/vp9/common/vp9_invtrans.h b/vp9/common/vp9_invtrans.h index b012834f3..94593f8cc 100644 --- a/vp9/common/vp9_invtrans.h +++ b/vp9/common/vp9_invtrans.h @@ -38,4 +38,9 @@ extern void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd); extern void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +extern void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb); +extern void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb); +#endif + #endif // __INC_INVTRANS_H diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 8d4d014ba..1139fb5d1 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -192,6 +192,9 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { /* Point at base of Mb MODE_INFO list */ const MODE_INFO *mode_info_context = cm->mi; +#if CONFIG_SUPERBLOCKS + const int mis = cm->mode_info_stride; +#endif /* Initialize the loop filter for this frame. */ vp9_loop_filter_frame_init(cm, xd, cm->filter_level); @@ -226,14 +229,18 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { if (mb_col > 0 #if CONFIG_SUPERBLOCKS && !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb && - mode_info_context[0].mbmi.mb_skip_coeff && - mode_info_context[-1].mbmi.mb_skip_coeff) + ((mode_info_context[0].mbmi.mb_skip_coeff && + mode_info_context[-1].mbmi.mb_skip_coeff) +#if CONFIG_TX32X32 + || mode_info_context[-1].mbmi.txfm_size == TX_32X32 +#endif + )) #endif ) vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); - if (!skip_lf && tx_type != TX_16X16) { + if (!skip_lf && tx_type < TX_16X16) { if (tx_type == TX_8X8) vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); @@ -247,14 +254,18 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { if (mb_row > 0 #if CONFIG_SUPERBLOCKS && !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb && - mode_info_context[0].mbmi.mb_skip_coeff && - mode_info_context[-cm->mode_info_stride].mbmi.mb_skip_coeff) + ((mode_info_context[0].mbmi.mb_skip_coeff && + mode_info_context[-mis].mbmi.mb_skip_coeff) +#if CONFIG_TX32X32 + || mode_info_context[-mis].mbmi.txfm_size == TX_32X32 +#endif + )) #endif ) vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); - if (!skip_lf && tx_type != TX_16X16) { + if (!skip_lf && tx_type < TX_16X16) { if (tx_type == TX_8X8) vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 0b6de7f82..d80498df1 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -58,6 +58,9 @@ typedef struct frame_contexts { vp9_prob hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; vp9_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; vp9_prob hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_prob coef_probs_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif nmv_context nmvc; nmv_context pre_nmvc; @@ -95,6 +98,11 @@ typedef struct frame_contexts { vp9_prob pre_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_prob pre_coef_probs_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif + unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS] @@ -110,6 +118,11 @@ typedef struct frame_contexts { unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + unsigned int coef_counts_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] + [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#endif + nmv_context_counts NMVcount; vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS - 1]; @@ -139,8 +152,11 @@ typedef enum { ONLY_4X4 = 0, ALLOW_8X8 = 1, ALLOW_16X16 = 2, - TX_MODE_SELECT = 3, - NB_TXFM_MODES = 4, +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + ALLOW_32X32 = 3, +#endif + TX_MODE_SELECT = 3 + (CONFIG_TX32X32 && CONFIG_SUPERBLOCKS), + NB_TXFM_MODES = 4 + (CONFIG_TX32X32 && CONFIG_SUPERBLOCKS), } TXFM_MODE; typedef struct VP9Common { @@ -268,7 +284,7 @@ typedef struct VP9Common { vp9_prob prob_comppred[COMP_PRED_CONTEXTS]; // FIXME contextualize - vp9_prob prob_tx[TX_SIZE_MAX - 1]; + vp9_prob prob_tx[TX_SIZE_MAX_SB - 1]; vp9_prob mbskip_pred_probs[MBSKIP_CONTEXTS]; diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c index 1f8dfce34..e567bac8d 100644 --- a/vp9/common/vp9_recon.c +++ b/vp9/common/vp9_recon.c @@ -168,6 +168,53 @@ void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { } } } + +#if CONFIG_TX32X32 +void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) { + int x, y, stride = xd->block[0].dst_stride; + short *diff = xd->sb_coeff_data.diff; + + for (y = 0; y < 32; y++) { + for (x = 0; x < 32; x++) { + int a = dst[x] + diff[x]; + if (a < 0) + a = 0; + else if (a > 255) + a = 255; + dst[x] = a; + } + dst += stride; + diff += 32; + } +} + +void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { + int x, y, stride = xd->block[16].dst_stride; + short *udiff = xd->sb_coeff_data.diff + 1024; + short *vdiff = xd->sb_coeff_data.diff + 1280; + + for (y = 0; y < 16; y++) { + for (x = 0; x < 16; x++) { + int u = udst[x] + udiff[x]; + int v = vdst[x] + vdiff[x]; + if (u < 0) + u = 0; + else if (u > 255) + u = 255; + if (v < 0) + v = 0; + else if (v > 255) + v = 255; + udst[x] = u; + vdst[x] = v; + } + udst += stride; + vdst += stride; + udiff += 16; + vdiff += 16; + } +} +#endif #endif void vp9_recon_mby_c(MACROBLOCKD *xd) { diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 5b7af100b..49a3a8595 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -361,6 +361,9 @@ specialize vp9_short_idct16x16 prototype void vp9_short_idct10_16x16 "short *input, short *output, int pitch" specialize vp9_short_idct10_16x16 +prototype void vp9_short_idct32x32 "short *input, short *output, int pitch" +specialize vp9_short_idct32x32 + prototype void vp9_ihtllm "const short *input, short *output, int pitch, int tx_type, int tx_dim" specialize vp9_ihtllm @@ -640,6 +643,9 @@ specialize vp9_short_fdct8x4 prototype void vp9_short_walsh4x4 "short *InputData, short *OutputData, int pitch" specialize vp9_short_walsh4x4 +prototype void vp9_short_fdct32x32 "short *InputData, short *OutputData, int pitch" +specialize vp9_short_fdct32x32 + prototype void vp9_short_fdct16x16 "short *InputData, short *OutputData, int pitch" specialize vp9_short_fdct16x16 diff --git a/vp9/common/vp9_seg_common.c b/vp9/common/vp9_seg_common.c index 46a6ee454..89c1e458d 100644 --- a/vp9/common/vp9_seg_common.c +++ b/vp9/common/vp9_seg_common.c @@ -14,7 +14,7 @@ static const int segfeaturedata_signed[SEG_LVL_MAX] = { 1, 1, 0, 0, 0, 0 }; static const int seg_feature_data_max[SEG_LVL_MAX] = - { MAXQ, 63, 0xf, MB_MODE_COUNT - 1, 255, TX_SIZE_MAX - 1}; + { MAXQ, 63, 0xf, MB_MODE_COUNT - 1, 255, TX_SIZE_MAX_SB - 1}; // These functions provide access to new segment level features. // Eventually these function may be "optimized out" but for the moment, diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 7e53884f7..b9f411dd2 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -209,8 +209,17 @@ static void kfread_modes(VP9D_COMP *pbi, m->mbmi.mode <= I8X8_PRED) { // FIXME(rbultje) code ternary symbol once all experiments are merged m->mbmi.txfm_size = vp9_read(bc, cm->prob_tx[0]); - if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED) + if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED) { m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[1]); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + if (m->mbmi.txfm_size != TX_8X8 && m->mbmi.encoded_as_sb) + m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[2]); +#endif + } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + } else if (cm->txfm_mode >= ALLOW_32X32 && m->mbmi.encoded_as_sb) { + m->mbmi.txfm_size = TX_32X32; +#endif } else if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) { m->mbmi.txfm_size = TX_16X16; } else if (cm->txfm_mode >= ALLOW_8X8 && m->mbmi.mode != B_PRED) { @@ -1219,8 +1228,17 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, // FIXME(rbultje) code ternary symbol once all experiments are merged mbmi->txfm_size = vp9_read(bc, cm->prob_tx[0]); if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED && - mbmi->mode != SPLITMV) + mbmi->mode != SPLITMV) { mbmi->txfm_size += vp9_read(bc, cm->prob_tx[1]); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + if (mbmi->encoded_as_sb && mbmi->txfm_size != TX_8X8) + mbmi->txfm_size += vp9_read(bc, cm->prob_tx[2]); +#endif + } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + } else if (mbmi->encoded_as_sb && cm->txfm_mode >= ALLOW_32X32) { + mbmi->txfm_size = TX_32X32; +#endif } else if (cm->txfm_mode >= ALLOW_16X16 && ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) || (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) { diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 76349ad86..7f851a18a 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -693,6 +693,7 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; VP9_COMMON *const pc = &pbi->common; MODE_INFO *orig_mi = xd->mode_info_context; + const int mis = pc->mode_info_stride; assert(xd->mode_info_context->mbmi.encoded_as_sb); @@ -733,6 +734,30 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, } /* dequantization and idct */ +#if CONFIG_TX32X32 + if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { + eobtotal = vp9_decode_sb_tokens(pbi, xd, bc); + if (eobtotal == 0) { // skip loopfilter + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + if (mb_col + 1 < pc->mb_cols) + xd->mode_info_context[1].mbmi.mb_skip_coeff = 1; + if (mb_row + 1 < pc->mb_rows) { + xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1; + if (mb_col + 1 < pc->mb_cols) + xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1; + } + } else { + vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant, + xd->dst.y_buffer, xd->dst.y_buffer, + xd->dst.y_stride, xd->dst.y_stride, + xd->eobs[0]); + vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024, + xd->block[16].dequant, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs + 16); + } + } else { +#endif for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; @@ -742,7 +767,7 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->above_context = pc->above_context + mb_col + x_idx; xd->left_context = pc->left_context + y_idx; - xd->mode_info_context = orig_mi + x_idx + y_idx * pc->mode_info_stride; + xd->mode_info_context = orig_mi + x_idx + y_idx * mis; for (i = 0; i < 25; i++) { xd->block[i].eob = 0; xd->eobs[i] = 0; @@ -766,6 +791,9 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->above_context = pc->above_context + mb_col; xd->left_context = pc->left_context; xd->mode_info_context = orig_mi; +#if CONFIG_TX32X32 + } +#endif } #endif @@ -1244,6 +1272,11 @@ static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { read_coef_probs_common(bc, pc->fc.coef_probs_16x16); read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_16x16); } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + if (pbi->common.txfm_mode > ALLOW_16X16) { + read_coef_probs_common(bc, pc->fc.coef_probs_32x32); + } +#endif } int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { @@ -1433,9 +1466,16 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { /* Read the loop filter level and type */ pc->txfm_mode = vp9_read_literal(&header_bc, 2); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + if (pc->txfm_mode == 3) + pc->txfm_mode += vp9_read_bit(&header_bc); +#endif if (pc->txfm_mode == TX_MODE_SELECT) { pc->prob_tx[0] = vp9_read_literal(&header_bc, 8); pc->prob_tx[1] = vp9_read_literal(&header_bc, 8); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + pc->prob_tx[2] = vp9_read_literal(&header_bc, 8); +#endif } pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc); @@ -1591,6 +1631,10 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pbi->common.fc.coef_probs_16x16); vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_16x16, pbi->common.fc.hybrid_coef_probs_16x16); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_copy(pbi->common.fc.pre_coef_probs_32x32, + pbi->common.fc.coef_probs_32x32); +#endif vp9_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob); #if CONFIG_SUPERBLOCKS vp9_copy(pbi->common.fc.pre_sb_ymode_prob, pbi->common.fc.sb_ymode_prob); @@ -1610,6 +1654,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { vp9_zero(pbi->common.fc.hybrid_coef_counts_8x8); vp9_zero(pbi->common.fc.coef_counts_16x16); vp9_zero(pbi->common.fc.hybrid_coef_counts_16x16); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_zero(pbi->common.fc.coef_counts_32x32); +#endif vp9_zero(pbi->common.fc.ymode_counts); #if CONFIG_SUPERBLOCKS vp9_zero(pbi->common.fc.sb_ymode_counts); diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 79114d58c..22a66716f 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -352,3 +352,30 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16); } } + +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +void vp9_dequant_idct_add_32x32(int16_t *input, const int16_t *dq, + uint8_t *pred, uint8_t *dest, int pitch, + int stride, uint16_t eobs) { + short output[1024]; + int i; + + input[0]= input[0] * dq[0] / 2; + for (i = 1; i < 1024; i++) + input[i] = input[i] * dq[1] / 2; + vp9_short_idct32x32_c(input, output, 64); + vpx_memset(input, 0, 2048); + + add_residual(output, pred, pitch, dest, stride, 32, 32); +} + +void vp9_dequant_idct_add_uv_block_16x16_c(short *q, const short *dq, + unsigned char *dstu, + unsigned char *dstv, + int stride, + unsigned short *eobs) { + vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride, eobs[0]); + vp9_dequant_idct_add_16x16_c(q + 256, dq, + dstv, dstv, stride, stride, eobs[4]); +} +#endif diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 897ad5204..35a26477a 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -55,8 +55,9 @@ #define CAT5_PROB3 157 #define CAT5_PROB4 180 -static const unsigned char cat6_prob[14] = -{ 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; +static const unsigned char cat6_prob[15] = { + 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 +}; void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { /* Clear entropy contexts */ @@ -161,6 +162,12 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, coef_counts = fc->hybrid_coef_counts_16x16[type]; } break; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + case TX_32X32: + coef_probs = fc->coef_probs_32x32[type]; + coef_counts = fc->coef_counts_32x32[type]; + break; +#endif } VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); @@ -256,6 +263,54 @@ static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) { return eob; } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +int vp9_decode_sb_tokens(VP9D_COMP* const pbi, + MACROBLOCKD* const xd, + BOOL_DECODER* const bc) { + ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context; + ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context; + unsigned short* const eobs = xd->eobs; + const int segment_id = xd->mode_info_context->mbmi.segment_id; + int c, i, eobtotal = 0, seg_eob; + + // Luma block + eobs[0] = c = decode_coefs(pbi, xd, bc, A, L, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, get_eob(xd, segment_id, 1024), + xd->sb_coeff_data.qcoeff, + vp9_default_zig_zag1d_32x32, + TX_32X32, vp9_coef_bands_32x32); + A[1] = A[2] = A[3] = A[0]; + L[1] = L[2] = L[3] = L[0]; + eobtotal += c; + + // 16x16 chroma blocks + seg_eob = get_eob(xd, segment_id, 256); + for (i = 16; i < 24; i += 4) { + ENTROPY_CONTEXT* const a = A + vp9_block2above_8x8[i]; + ENTROPY_CONTEXT* const l = L + vp9_block2left_8x8[i]; + + eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV, + DCT_DCT, seg_eob, + xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64, + vp9_default_zig_zag1d_16x16, + TX_16X16, vp9_coef_bands_16x16); + a[1] = a[0]; + l[1] = l[0]; + eobtotal += c; + } + + // no Y2 block + vpx_memset(&A[8], 0, sizeof(A[8])); + vpx_memset(&L[8], 0, sizeof(L[8])); + + vpx_memcpy(xd->above_context + 1, xd->above_context, + sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(xd->left_context + 1, xd->left_context, + sizeof(ENTROPY_CONTEXT_PLANES)); + + return eobtotal; +} +#endif static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, MACROBLOCKD* const xd, diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h index 9b319d4a9..09d354ea6 100644 --- a/vp9/decoder/vp9_detokenize.h +++ b/vp9/decoder/vp9_detokenize.h @@ -23,6 +23,12 @@ int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, int vp9_decode_mb_tokens(VP9D_COMP* const, MACROBLOCKD* const, BOOL_DECODER* const); +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +int vp9_decode_sb_tokens(VP9D_COMP* const pbi, + MACROBLOCKD* const xd, + BOOL_DECODER* const bc); +#endif + int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd, BOOL_DECODER* const bc); diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 0adaeee0a..847815f50 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1200,8 +1200,13 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { TX_SIZE sz = mi->txfm_size; // FIXME(rbultje) code ternary symbol once all experiments are merged vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); - if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) + if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) { vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + if (mi->encoded_as_sb && sz != TX_8X8) + vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]); +#endif + } } #ifdef ENTROPY_STATS @@ -1337,8 +1342,13 @@ static void write_mb_modes_kf(const VP9_COMMON *c, TX_SIZE sz = m->mbmi.txfm_size; // FIXME(rbultje) code ternary symbol once all experiments are merged vp9_write(bc, sz != TX_4X4, c->prob_tx[0]); - if (sz != TX_4X4 && ym <= TM_PRED) + if (sz != TX_4X4 && ym <= TM_PRED) { vp9_write(bc, sz != TX_8X8, c->prob_tx[1]); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + if (m->mbmi.encoded_as_sb && sz != TX_8X8) + vp9_write(bc, sz != TX_16X16, c->prob_tx[2]); +#endif + } } } @@ -1551,25 +1561,50 @@ static void build_coeff_contexts(VP9_COMP *cpi) { } } } - } - for (i = 0; i < BLOCK_TYPES_16X16; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_hybrid_coef_probs_16x16[i][j][k], - cpi->frame_hybrid_branch_ct_16x16[i][j][k], - cpi->hybrid_coef_counts_16x16[i][j][k], 256, 1); + for (i = 0; i < BLOCK_TYPES_16X16; ++i) { + for (j = 0; j < COEF_BANDS; ++j) { + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + continue; + vp9_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, + cpi->frame_hybrid_coef_probs_16x16[i][j][k], + cpi->frame_hybrid_branch_ct_16x16[i][j][k], + cpi->hybrid_coef_counts_16x16[i][j][k], 256, 1); #ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - hybrid_context_counters_16x16[i][j][k][t] += cpi->hybrid_coef_counts_16x16[i][j][k][t]; + if (!cpi->dummy_packing) + for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + hybrid_context_counters_16x16[i][j][k][t] += + cpi->hybrid_coef_counts_16x16[i][j][k][t]; #endif + } } } } + +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + if (cpi->common.txfm_mode > ALLOW_16X16) { + for (i = 0; i < BLOCK_TYPES_32X32; ++i) { + for (j = 0; j < COEF_BANDS; ++j) { + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + continue; + vp9_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, + cpi->frame_coef_probs_32x32[i][j][k], + cpi->frame_branch_ct_32x32[i][j][k], + cpi->coef_counts_32x32[i][j][k], 256, 1); +#ifdef ENTROPY_STATS + if (!cpi->dummy_packing) + for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) + context_counters_32x32[i][j][k][t] += + cpi->coef_counts_32x32[i][j][k][t]; +#endif + } + } + } + } +#endif } static void update_coef_probs_common( @@ -1714,6 +1749,15 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { cpi->common.fc.hybrid_coef_probs_16x16, cpi->frame_hybrid_branch_ct_16x16); } + +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + if (cpi->common.txfm_mode > ALLOW_16X16) { + update_coef_probs_common(bc, + cpi->frame_coef_probs_32x32, + cpi->common.fc.coef_probs_32x32, + cpi->frame_branch_ct_32x32); + } +#endif } #ifdef PACKET_TESTING @@ -1955,18 +1999,53 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, { if (pc->txfm_mode == TX_MODE_SELECT) { - pc->prob_tx[0] = get_prob(cpi->txfm_count[0] + cpi->txfm_count_8x8p[0], - cpi->txfm_count[0] + cpi->txfm_count[1] + cpi->txfm_count[2] + - cpi->txfm_count_8x8p[0] + cpi->txfm_count_8x8p[1]); - pc->prob_tx[1] = get_prob(cpi->txfm_count[1], cpi->txfm_count[1] + cpi->txfm_count[2]); + pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] + + cpi->txfm_count_16x16p[TX_4X4] + + cpi->txfm_count_8x8p[TX_4X4], + cpi->txfm_count_32x32p[TX_4X4] + + cpi->txfm_count_32x32p[TX_8X8] + + cpi->txfm_count_32x32p[TX_16X16] + +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + cpi->txfm_count_32x32p[TX_32X32] + +#endif + cpi->txfm_count_16x16p[TX_4X4] + + cpi->txfm_count_16x16p[TX_8X8] + + cpi->txfm_count_16x16p[TX_16X16] + + cpi->txfm_count_8x8p[TX_4X4] + + cpi->txfm_count_8x8p[TX_8X8]); + pc->prob_tx[1] = get_prob(cpi->txfm_count_32x32p[TX_8X8] + + cpi->txfm_count_16x16p[TX_8X8], + cpi->txfm_count_32x32p[TX_8X8] + + cpi->txfm_count_32x32p[TX_16X16] + +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + cpi->txfm_count_32x32p[TX_32X32] + +#endif + cpi->txfm_count_16x16p[TX_8X8] + + cpi->txfm_count_16x16p[TX_16X16]); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + pc->prob_tx[2] = get_prob(cpi->txfm_count_32x32p[TX_16X16], + cpi->txfm_count_32x32p[TX_16X16] + + cpi->txfm_count_32x32p[TX_32X32]); +#endif } else { pc->prob_tx[0] = 128; pc->prob_tx[1] = 128; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + pc->prob_tx[2] = 128; +#endif } - vp9_write_literal(&header_bc, pc->txfm_mode, 2); + vp9_write_literal(&header_bc, pc->txfm_mode <= 3 ? pc->txfm_mode : 3, 2); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + if (pc->txfm_mode > ALLOW_16X16) { + vp9_write_bit(&header_bc, pc->txfm_mode == TX_MODE_SELECT); + } +#endif if (pc->txfm_mode == TX_MODE_SELECT) { vp9_write_literal(&header_bc, pc->prob_tx[0], 8); vp9_write_literal(&header_bc, pc->prob_tx[1], 8); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_write_literal(&header_bc, pc->prob_tx[2], 8); +#endif } } @@ -2150,6 +2229,10 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8, cpi->common.fc.hybrid_coef_probs_8x8); vp9_copy(cpi->common.fc.pre_coef_probs_16x16, cpi->common.fc.coef_probs_16x16); vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_copy(cpi->common.fc.pre_coef_probs_32x32, + cpi->common.fc.coef_probs_32x32); +#endif #if CONFIG_SUPERBLOCKS vp9_copy(cpi->common.fc.pre_sb_ymode_prob, cpi->common.fc.sb_ymode_prob); #endif diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 4669d2ed6..82dc5edc1 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -36,9 +36,15 @@ typedef struct block { short *zbin; short *zbin_8x8; short *zbin_16x16; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + short *zbin_32x32; +#endif short *zrun_zbin_boost; short *zrun_zbin_boost_8x8; short *zrun_zbin_boost_16x16; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + short *zrun_zbin_boost_32x32; +#endif short *round; // Zbin Over Quant value @@ -52,6 +58,9 @@ typedef struct block { int eob_max_offset; int eob_max_offset_8x8; int eob_max_offset_16x16; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + int eob_max_offset_32x32; +#endif } BLOCK; typedef struct { @@ -83,6 +92,13 @@ typedef struct { int64_t txfm_rd_diff[NB_TXFM_MODES]; } PICK_MODE_CONTEXT; +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +typedef struct superblock { + DECLARE_ALIGNED(16, short, src_diff[32*32+16*16*2]); + DECLARE_ALIGNED(16, short, coeff[32*32+16*16*2]); +} SUPERBLOCK; +#endif + typedef struct macroblock { DECLARE_ALIGNED(16, short, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y DECLARE_ALIGNED(16, short, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y @@ -95,6 +111,10 @@ typedef struct macroblock { // 1 DC 2nd order block each with 16 entries BLOCK block[25]; +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 + SUPERBLOCK sb_coeff_data; +#endif + YV12_BUFFER_CONFIG src; MACROBLOCKD e_mbd; @@ -153,9 +173,9 @@ typedef struct macroblock { unsigned char *active_ptr; - unsigned int token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS] + unsigned int token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES][COEF_BANDS] [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; - unsigned int hybrid_token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS] + unsigned int hybrid_token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES][COEF_BANDS] [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; int optimize; diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 6753f2462..0fc8fa35e 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -1330,3 +1330,461 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) { #undef RIGHT_SHIFT #undef ROUNDING #endif + +#if CONFIG_TX32X32 +#if !CONFIG_DWT32X32HYBRID +static void dct32_1d(double *input, double *output, int stride) { + static const double C1 = 0.998795456205; // cos(pi * 1 / 64) + static const double C2 = 0.995184726672; // cos(pi * 2 / 64) + static const double C3 = 0.989176509965; // cos(pi * 3 / 64) + static const double C4 = 0.980785280403; // cos(pi * 4 / 64) + static const double C5 = 0.970031253195; // cos(pi * 5 / 64) + static const double C6 = 0.956940335732; // cos(pi * 6 / 64) + static const double C7 = 0.941544065183; // cos(pi * 7 / 64) + static const double C8 = 0.923879532511; // cos(pi * 8 / 64) + static const double C9 = 0.903989293123; // cos(pi * 9 / 64) + static const double C10 = 0.881921264348; // cos(pi * 10 / 64) + static const double C11 = 0.857728610000; // cos(pi * 11 / 64) + static const double C12 = 0.831469612303; // cos(pi * 12 / 64) + static const double C13 = 0.803207531481; // cos(pi * 13 / 64) + static const double C14 = 0.773010453363; // cos(pi * 14 / 64) + static const double C15 = 0.740951125355; // cos(pi * 15 / 64) + static const double C16 = 0.707106781187; // cos(pi * 16 / 64) + static const double C17 = 0.671558954847; // cos(pi * 17 / 64) + static const double C18 = 0.634393284164; // cos(pi * 18 / 64) + static const double C19 = 0.595699304492; // cos(pi * 19 / 64) + static const double C20 = 0.555570233020; // cos(pi * 20 / 64) + static const double C21 = 0.514102744193; // cos(pi * 21 / 64) + static const double C22 = 0.471396736826; // cos(pi * 22 / 64) + static const double C23 = 0.427555093430; // cos(pi * 23 / 64) + static const double C24 = 0.382683432365; // cos(pi * 24 / 64) + static const double C25 = 0.336889853392; // cos(pi * 25 / 64) + static const double C26 = 0.290284677254; // cos(pi * 26 / 64) + static const double C27 = 0.242980179903; // cos(pi * 27 / 64) + static const double C28 = 0.195090322016; // cos(pi * 28 / 64) + static const double C29 = 0.146730474455; // cos(pi * 29 / 64) + static const double C30 = 0.098017140330; // cos(pi * 30 / 64) + static const double C31 = 0.049067674327; // cos(pi * 31 / 64) + + double step[32]; + + // Stage 1 + step[0] = input[stride*0] + input[stride*(32 - 1)]; + step[1] = input[stride*1] + input[stride*(32 - 2)]; + step[2] = input[stride*2] + input[stride*(32 - 3)]; + step[3] = input[stride*3] + input[stride*(32 - 4)]; + step[4] = input[stride*4] + input[stride*(32 - 5)]; + step[5] = input[stride*5] + input[stride*(32 - 6)]; + step[6] = input[stride*6] + input[stride*(32 - 7)]; + step[7] = input[stride*7] + input[stride*(32 - 8)]; + step[8] = input[stride*8] + input[stride*(32 - 9)]; + step[9] = input[stride*9] + input[stride*(32 - 10)]; + step[10] = input[stride*10] + input[stride*(32 - 11)]; + step[11] = input[stride*11] + input[stride*(32 - 12)]; + step[12] = input[stride*12] + input[stride*(32 - 13)]; + step[13] = input[stride*13] + input[stride*(32 - 14)]; + step[14] = input[stride*14] + input[stride*(32 - 15)]; + step[15] = input[stride*15] + input[stride*(32 - 16)]; + step[16] = -input[stride*16] + input[stride*(32 - 17)]; + step[17] = -input[stride*17] + input[stride*(32 - 18)]; + step[18] = -input[stride*18] + input[stride*(32 - 19)]; + step[19] = -input[stride*19] + input[stride*(32 - 20)]; + step[20] = -input[stride*20] + input[stride*(32 - 21)]; + step[21] = -input[stride*21] + input[stride*(32 - 22)]; + step[22] = -input[stride*22] + input[stride*(32 - 23)]; + step[23] = -input[stride*23] + input[stride*(32 - 24)]; + step[24] = -input[stride*24] + input[stride*(32 - 25)]; + step[25] = -input[stride*25] + input[stride*(32 - 26)]; + step[26] = -input[stride*26] + input[stride*(32 - 27)]; + step[27] = -input[stride*27] + input[stride*(32 - 28)]; + step[28] = -input[stride*28] + input[stride*(32 - 29)]; + step[29] = -input[stride*29] + input[stride*(32 - 30)]; + step[30] = -input[stride*30] + input[stride*(32 - 31)]; + step[31] = -input[stride*31] + input[stride*(32 - 32)]; + + // Stage 2 + output[stride*0] = step[0] + step[16 - 1]; + output[stride*1] = step[1] + step[16 - 2]; + output[stride*2] = step[2] + step[16 - 3]; + output[stride*3] = step[3] + step[16 - 4]; + output[stride*4] = step[4] + step[16 - 5]; + output[stride*5] = step[5] + step[16 - 6]; + output[stride*6] = step[6] + step[16 - 7]; + output[stride*7] = step[7] + step[16 - 8]; + output[stride*8] = -step[8] + step[16 - 9]; + output[stride*9] = -step[9] + step[16 - 10]; + output[stride*10] = -step[10] + step[16 - 11]; + output[stride*11] = -step[11] + step[16 - 12]; + output[stride*12] = -step[12] + step[16 - 13]; + output[stride*13] = -step[13] + step[16 - 14]; + output[stride*14] = -step[14] + step[16 - 15]; + output[stride*15] = -step[15] + step[16 - 16]; + + output[stride*16] = step[16]; + output[stride*17] = step[17]; + output[stride*18] = step[18]; + output[stride*19] = step[19]; + + output[stride*20] = (-step[20] + step[27])*C16; + output[stride*21] = (-step[21] + step[26])*C16; + output[stride*22] = (-step[22] + step[25])*C16; + output[stride*23] = (-step[23] + step[24])*C16; + + output[stride*24] = (step[24] + step[23])*C16; + output[stride*25] = (step[25] + step[22])*C16; + output[stride*26] = (step[26] + step[21])*C16; + output[stride*27] = (step[27] + step[20])*C16; + + output[stride*28] = step[28]; + output[stride*29] = step[29]; + output[stride*30] = step[30]; + output[stride*31] = step[31]; + + // Stage 3 + step[0] = output[stride*0] + output[stride*(8 - 1)]; + step[1] = output[stride*1] + output[stride*(8 - 2)]; + step[2] = output[stride*2] + output[stride*(8 - 3)]; + step[3] = output[stride*3] + output[stride*(8 - 4)]; + step[4] = -output[stride*4] + output[stride*(8 - 5)]; + step[5] = -output[stride*5] + output[stride*(8 - 6)]; + step[6] = -output[stride*6] + output[stride*(8 - 7)]; + step[7] = -output[stride*7] + output[stride*(8 - 8)]; + step[8] = output[stride*8]; + step[9] = output[stride*9]; + step[10] = (-output[stride*10] + output[stride*13])*C16; + step[11] = (-output[stride*11] + output[stride*12])*C16; + step[12] = (output[stride*12] + output[stride*11])*C16; + step[13] = (output[stride*13] + output[stride*10])*C16; + step[14] = output[stride*14]; + step[15] = output[stride*15]; + + step[16] = output[stride*16] + output[stride*23]; + step[17] = output[stride*17] + output[stride*22]; + step[18] = output[stride*18] + output[stride*21]; + step[19] = output[stride*19] + output[stride*20]; + step[20] = -output[stride*20] + output[stride*19]; + step[21] = -output[stride*21] + output[stride*18]; + step[22] = -output[stride*22] + output[stride*17]; + step[23] = -output[stride*23] + output[stride*16]; + step[24] = -output[stride*24] + output[stride*31]; + step[25] = -output[stride*25] + output[stride*30]; + step[26] = -output[stride*26] + output[stride*29]; + step[27] = -output[stride*27] + output[stride*28]; + step[28] = output[stride*28] + output[stride*27]; + step[29] = output[stride*29] + output[stride*26]; + step[30] = output[stride*30] + output[stride*25]; + step[31] = output[stride*31] + output[stride*24]; + + // Stage 4 + output[stride*0] = step[0] + step[3]; + output[stride*1] = step[1] + step[2]; + output[stride*2] = -step[2] + step[1]; + output[stride*3] = -step[3] + step[0]; + output[stride*4] = step[4]; + output[stride*5] = (-step[5] + step[6])*C16; + output[stride*6] = (step[6] + step[5])*C16; + output[stride*7] = step[7]; + output[stride*8] = step[8] + step[11]; + output[stride*9] = step[9] + step[10]; + output[stride*10] = -step[10] + step[9]; + output[stride*11] = -step[11] + step[8]; + output[stride*12] = -step[12] + step[15]; + output[stride*13] = -step[13] + step[14]; + output[stride*14] = step[14] + step[13]; + output[stride*15] = step[15] + step[12]; + + output[stride*16] = step[16]; + output[stride*17] = step[17]; + output[stride*18] = step[18]*-C8 + step[29]*C24; + output[stride*19] = step[19]*-C8 + step[28]*C24; + output[stride*20] = step[20]*-C24 + step[27]*-C8; + output[stride*21] = step[21]*-C24 + step[26]*-C8; + output[stride*22] = step[22]; + output[stride*23] = step[23]; + output[stride*24] = step[24]; + output[stride*25] = step[25]; + output[stride*26] = step[26]*C24 + step[21]*-C8; + output[stride*27] = step[27]*C24 + step[20]*-C8; + output[stride*28] = step[28]*C8 + step[19]*C24; + output[stride*29] = step[29]*C8 + step[18]*C24; + output[stride*30] = step[30]; + output[stride*31] = step[31]; + + // Stage 5 + step[0] = (output[stride*0] + output[stride*1]) * C16; + step[1] = (-output[stride*1] + output[stride*0]) * C16; + step[2] = output[stride*2]*C24 + output[stride*3] * C8; + step[3] = output[stride*3]*C24 - output[stride*2] * C8; + step[4] = output[stride*4] + output[stride*5]; + step[5] = -output[stride*5] + output[stride*4]; + step[6] = -output[stride*6] + output[stride*7]; + step[7] = output[stride*7] + output[stride*6]; + step[8] = output[stride*8]; + step[9] = output[stride*9]*-C8 + output[stride*14]*C24; + step[10] = output[stride*10]*-C24 + output[stride*13]*-C8; + step[11] = output[stride*11]; + step[12] = output[stride*12]; + step[13] = output[stride*13]*C24 + output[stride*10]*-C8; + step[14] = output[stride*14]*C8 + output[stride*9]*C24; + step[15] = output[stride*15]; + + step[16] = output[stride*16] + output[stride*19]; + step[17] = output[stride*17] + output[stride*18]; + step[18] = -output[stride*18] + output[stride*17]; + step[19] = -output[stride*19] + output[stride*16]; + step[20] = -output[stride*20] + output[stride*23]; + step[21] = -output[stride*21] + output[stride*22]; + step[22] = output[stride*22] + output[stride*21]; + step[23] = output[stride*23] + output[stride*20]; + step[24] = output[stride*24] + output[stride*27]; + step[25] = output[stride*25] + output[stride*26]; + step[26] = -output[stride*26] + output[stride*25]; + step[27] = -output[stride*27] + output[stride*24]; + step[28] = -output[stride*28] + output[stride*31]; + step[29] = -output[stride*29] + output[stride*30]; + step[30] = output[stride*30] + output[stride*29]; + step[31] = output[stride*31] + output[stride*28]; + + // Stage 6 + output[stride*0] = step[0]; + output[stride*1] = step[1]; + output[stride*2] = step[2]; + output[stride*3] = step[3]; + output[stride*4] = step[4]*C28 + step[7]*C4; + output[stride*5] = step[5]*C12 + step[6]*C20; + output[stride*6] = step[6]*C12 + step[5]*-C20; + output[stride*7] = step[7]*C28 + step[4]*-C4; + output[stride*8] = step[8] + step[9]; + output[stride*9] = -step[9] + step[8]; + output[stride*10] = -step[10] + step[11]; + output[stride*11] = step[11] + step[10]; + output[stride*12] = step[12] + step[13]; + output[stride*13] = -step[13] + step[12]; + output[stride*14] = -step[14] + step[15]; + output[stride*15] = step[15] + step[14]; + + output[stride*16] = step[16]; + output[stride*17] = step[17]*-C4 + step[30]*C28; + output[stride*18] = step[18]*-C28 + step[29]*-C4; + output[stride*19] = step[19]; + output[stride*20] = step[20]; + output[stride*21] = step[21]*-C20 + step[26]*C12; + output[stride*22] = step[22]*-C12 + step[25]*-C20; + output[stride*23] = step[23]; + output[stride*24] = step[24]; + output[stride*25] = step[25]*C12 + step[22]*-C20; + output[stride*26] = step[26]*C20 + step[21]*C12; + output[stride*27] = step[27]; + output[stride*28] = step[28]; + output[stride*29] = step[29]*C28 + step[18]*-C4; + output[stride*30] = step[30]*C4 + step[17]*C28; + output[stride*31] = step[31]; + + // Stage 7 + step[0] = output[stride*0]; + step[1] = output[stride*1]; + step[2] = output[stride*2]; + step[3] = output[stride*3]; + step[4] = output[stride*4]; + step[5] = output[stride*5]; + step[6] = output[stride*6]; + step[7] = output[stride*7]; + step[8] = output[stride*8]*C30 + output[stride*15]*C2; + step[9] = output[stride*9]*C14 + output[stride*14]*C18; + step[10] = output[stride*10]*C22 + output[stride*13]*C10; + step[11] = output[stride*11]*C6 + output[stride*12]*C26; + step[12] = output[stride*12]*C6 + output[stride*11]*-C26; + step[13] = output[stride*13]*C22 + output[stride*10]*-C10; + step[14] = output[stride*14]*C14 + output[stride*9]*-C18; + step[15] = output[stride*15]*C30 + output[stride*8]*-C2; + + step[16] = output[stride*16] + output[stride*17]; + step[17] = -output[stride*17] + output[stride*16]; + step[18] = -output[stride*18] + output[stride*19]; + step[19] = output[stride*19] + output[stride*18]; + step[20] = output[stride*20] + output[stride*21]; + step[21] = -output[stride*21] + output[stride*20]; + step[22] = -output[stride*22] + output[stride*23]; + step[23] = output[stride*23] + output[stride*22]; + step[24] = output[stride*24] + output[stride*25]; + step[25] = -output[stride*25] + output[stride*24]; + step[26] = -output[stride*26] + output[stride*27]; + step[27] = output[stride*27] + output[stride*26]; + step[28] = output[stride*28] + output[stride*29]; + step[29] = -output[stride*29] + output[stride*28]; + step[30] = -output[stride*30] + output[stride*31]; + step[31] = output[stride*31] + output[stride*30]; + + // Final stage --- outputs indices are bit-reversed. + output[stride*0] = step[0]; + output[stride*16] = step[1]; + output[stride*8] = step[2]; + output[stride*24] = step[3]; + output[stride*4] = step[4]; + output[stride*20] = step[5]; + output[stride*12] = step[6]; + output[stride*28] = step[7]; + output[stride*2] = step[8]; + output[stride*18] = step[9]; + output[stride*10] = step[10]; + output[stride*26] = step[11]; + output[stride*6] = step[12]; + output[stride*22] = step[13]; + output[stride*14] = step[14]; + output[stride*30] = step[15]; + + output[stride*1] = step[16]*C31 + step[31]*C1; + output[stride*17] = step[17]*C15 + step[30]*C17; + output[stride*9] = step[18]*C23 + step[29]*C9; + output[stride*25] = step[19]*C7 + step[28]*C25; + output[stride*5] = step[20]*C27 + step[27]*C5; + output[stride*21] = step[21]*C11 + step[26]*C21; + output[stride*13] = step[22]*C19 + step[25]*C13; + output[stride*29] = step[23]*C3 + step[24]*C29; + output[stride*3] = step[24]*C3 + step[23]*-C29; + output[stride*19] = step[25]*C19 + step[22]*-C13; + output[stride*11] = step[26]*C11 + step[21]*-C21; + output[stride*27] = step[27]*C27 + step[20]*-C5; + output[stride*7] = step[28]*C7 + step[19]*-C25; + output[stride*23] = step[29]*C23 + step[18]*-C9; + output[stride*15] = step[30]*C15 + step[17]*-C17; + output[stride*31] = step[31]*C31 + step[16]*-C1; +} + +void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { + vp9_clear_system_state(); // Make it simd safe : __asm emms; + { + int shortpitch = pitch >> 1; + int i, j; + double output[1024]; + // First transform columns + for (i = 0; i < 32; i++) { + double temp_in[32], temp_out[32]; + for (j = 0; j < 32; j++) + temp_in[j] = input[j*shortpitch + i]; + dct32_1d(temp_in, temp_out, 1); + for (j = 0; j < 32; j++) + output[j*32 + i] = temp_out[j]; + } + // Then transform rows + for (i = 0; i < 32; ++i) { + double temp_in[32], temp_out[32]; + for (j = 0; j < 32; ++j) + temp_in[j] = output[j + i*32]; + dct32_1d(temp_in, temp_out, 1); + for (j = 0; j < 32; ++j) + output[j + i*32] = temp_out[j]; + } + // Scale by some magic number + for (i = 0; i < 1024; i++) { + out[i] = (short)round(output[i]/4); + } + } + + vp9_clear_system_state(); // Make it simd safe : __asm emms; +} + +#else // CONFIG_DWT32X32HYBRID + +#define MAX_BLOCK_LENGTH 64 +#define ENH_PRECISION_BITS 1 +#define ENH_PRECISION_RND ((1 << ENH_PRECISION_BITS) / 2) + +// Note: block length must be even for this implementation +static void analysis_53_row(int length, short *x, + short *lowpass, short *highpass) { + int n; + short r, * a, * b; + + n = length >> 1; + b = highpass; + a = lowpass; + while (--n) { + *a++ = (r = *x++) << 1; + *b++ = *x - ((r + x[1] + 1) >> 1); + x++; + } + *a = (r = *x++) << 1; + *b = *x - r; + + n = length >> 1; + b = highpass; + a = lowpass; + r = *highpass; + while (n--) { + *a++ += (r + (*b) + 1) >> 1; + r = *b++; + } +} + +static void analysis_53_col(int length, short *x, + short *lowpass, short *highpass) { + int n; + short r, * a, * b; + + n = length >> 1; + b = highpass; + a = lowpass; + while (--n) { + *a++ = (r = *x++); + *b++ = (((*x) << 1) - (r + x[1]) + 2) >> 2; + x++; + } + *a = (r = *x++); + *b = (*x - r + 1) >> 1; + + n = length >> 1; + b = highpass; + a = lowpass; + r = *highpass; + while (n--) { + *a++ += (r + (*b) + 1) >> 1; + r = *b++; + } +} + +// NOTE: Using a 5/3 integer wavelet for now. Explore using a wavelet +// with a better response later +static void dyadic_analyze(int levels, int width, int height, + short *x, int pitch_x, short *c, int pitch_c) { + int lv, i, j, nh, nw, hh = height, hw = width; + short buffer[2 * MAX_BLOCK_LENGTH]; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + c[i * pitch_c + j] = x[i * pitch_x + j] << ENH_PRECISION_BITS; + } + } + for (lv = 0; lv < levels; lv++) { + nh = hh; + hh = (hh + 1) >> 1; + nw = hw; + hw = (hw + 1) >> 1; + if ((nh < 2) || (nw < 2)) return; + for (i = 0; i < nh; i++) { + memcpy(buffer, &c[i * pitch_c], nw * sizeof(short)); + analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw); + } + for (j = 0; j < nw; j++) { + for (i = 0; i < nh; i++) + buffer[i + nh] = c[i * pitch_c + j]; + analysis_53_col(nh, buffer + nh, buffer, buffer + hh); + for (i = 0; i < nh; i++) + c[i * pitch_c + j] = buffer[i]; + } + } +} + +void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { + // assume out is a 32x32 buffer + short buffer[16 * 16]; + int i; + const int short_pitch = pitch >> 1; + dyadic_analyze(1, 32, 32, input, short_pitch, out, 32); + // TODO(debargha): Implement more efficiently by adding output pitch + // argument to the dct16x16 function + vp9_short_fdct16x16_c(out, buffer, 64); + for (i = 0; i < 16; ++i) + vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16); +} +#endif // CONFIG_DWT32X32HYBRID +#endif // CONFIG_TX32X32 diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 6ab23cae0..f504fc53c 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -456,6 +456,10 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, if (xd->mb_to_right_edge >= 0) vpx_memcpy(xd->mode_info_context + mis + 1, mi, sizeof(MODE_INFO)); } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + } else { + ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16]; +#endif } #endif @@ -1487,6 +1491,9 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_zero(cpi->hybrid_coef_counts_8x8); vp9_zero(cpi->coef_counts_16x16); vp9_zero(cpi->hybrid_coef_counts_16x16); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_zero(cpi->coef_counts_32x32); +#endif vp9_frame_init_quantizer(cpi); @@ -1507,7 +1514,8 @@ static void encode_frame_internal(VP9_COMP *cpi) { vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff)); vpx_memset(cpi->single_pred_count, 0, sizeof(cpi->single_pred_count)); vpx_memset(cpi->comp_pred_count, 0, sizeof(cpi->comp_pred_count)); - vpx_memset(cpi->txfm_count, 0, sizeof(cpi->txfm_count)); + vpx_memset(cpi->txfm_count_32x32p, 0, sizeof(cpi->txfm_count_32x32p)); + vpx_memset(cpi->txfm_count_16x16p, 0, sizeof(cpi->txfm_count_16x16p)); vpx_memset(cpi->txfm_count_8x8p, 0, sizeof(cpi->txfm_count_8x8p)); vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff)); { @@ -1700,7 +1708,11 @@ void vp9_encode_frame(VP9_COMP *cpi) { * keyframe's probabilities as an estimate of what the current keyframe's * coefficient cost distributions may look like. */ if (frame_type == 0) { +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + txfm_type = ALLOW_32X32; +#else txfm_type = ALLOW_16X16; +#endif } else #if 0 /* FIXME (rbultje) @@ -1731,9 +1743,15 @@ void vp9_encode_frame(VP9_COMP *cpi) { } else txfm_type = ALLOW_8X8; #else - txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] >= +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >= cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? + ALLOW_32X32 : TX_MODE_SELECT; +#else + txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] >= + cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? ALLOW_16X16 : TX_MODE_SELECT; +#endif #endif cpi->common.txfm_mode = txfm_type; if (txfm_type != TX_MODE_SELECT) { @@ -1753,7 +1771,8 @@ void vp9_encode_frame(VP9_COMP *cpi) { int64_t pd = cpi->rd_tx_select_diff[i]; int diff; if (i == TX_MODE_SELECT) - pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZE_MAX - 1), 0); + pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, + 2048 * (TX_SIZE_MAX_SB - 1), 0); diff = (int)(pd / cpi->common.MBs); cpi->rd_tx_select_threshes[frame_type][i] += diff; cpi->rd_tx_select_threshes[frame_type][i] /= 2; @@ -1776,19 +1795,37 @@ void vp9_encode_frame(VP9_COMP *cpi) { } if (cpi->common.txfm_mode == TX_MODE_SELECT) { - const int count4x4 = cpi->txfm_count[TX_4X4] + cpi->txfm_count_8x8p[TX_4X4]; - const int count8x8 = cpi->txfm_count[TX_8X8]; + const int count4x4 = cpi->txfm_count_16x16p[TX_4X4] + + cpi->txfm_count_32x32p[TX_4X4] + + cpi->txfm_count_8x8p[TX_4X4]; + const int count8x8_lp = cpi->txfm_count_32x32p[TX_8X8] + + cpi->txfm_count_16x16p[TX_8X8]; const int count8x8_8x8p = cpi->txfm_count_8x8p[TX_8X8]; - const int count16x16 = cpi->txfm_count[TX_16X16]; + const int count16x16_16x16p = cpi->txfm_count_16x16p[TX_16X16]; + const int count16x16_lp = cpi->txfm_count_32x32p[TX_16X16]; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + const int count32x32 = cpi->txfm_count_32x32p[TX_32X32]; +#else + const int count32x32 = 0; +#endif - if (count4x4 == 0 && count16x16 == 0) { + if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && + count32x32 == 0) { cpi->common.txfm_mode = ALLOW_8X8; reset_skip_txfm_size(cpi, TX_8X8); - } else if (count8x8 == 0 && count16x16 == 0 && count8x8_8x8p == 0) { + } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && + count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { cpi->common.txfm_mode = ONLY_4X4; reset_skip_txfm_size(cpi, TX_4X4); - } else if (count8x8 == 0 && count4x4 == 0) { +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { + cpi->common.txfm_mode = ALLOW_32X32; +#endif + } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { cpi->common.txfm_mode = ALLOW_16X16; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + reset_skip_txfm_size(cpi, TX_16X16); +#endif } } } else { @@ -2087,6 +2124,7 @@ static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag); } + assert(mbmi->txfm_size <= TX_16X16); if (mbmi->ref_frame == INTRA_FRAME) { #ifdef ENC_DEBUG if (enc_debug) { @@ -2266,7 +2304,7 @@ static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, vp9_get_segdata(&x->e_mbd, segment_id, SEG_LVL_EOB) == 0))) { if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED && mbmi->mode != SPLITMV) { - cpi->txfm_count[mbmi->txfm_size]++; + cpi->txfm_count_16x16p[mbmi->txfm_size]++; } else if (mbmi->mode == I8X8_PRED || (mbmi->mode == SPLITMV && mbmi->partitioning != PARTITIONING_4X4)) { @@ -2308,6 +2346,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, MODE_INFO *mi = x->e_mbd.mode_info_context; unsigned int segment_id = mi->mbmi.segment_id; ENTROPY_CONTEXT_PLANES ta[4], tl[4]; + const int mis = cm->mode_info_stride; x->skip = 0; @@ -2397,6 +2436,53 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, xd->dst.y_stride, xd->dst.uv_stride); } +#if CONFIG_TX32X32 + if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { + vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, + dst, dst_y_stride); + vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride); + vp9_transform_sby_32x32(x); + vp9_transform_sbuv_16x16(x); + vp9_quantize_sby_32x32(x); + vp9_quantize_sbuv_16x16(x); + // TODO(rbultje): trellis optimize + vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); + vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); + vp9_recon_sby_s_c(&x->e_mbd, dst); + vp9_recon_sbuv_s_c(&x->e_mbd, udst, vdst); + + if (!x->skip) { + vp9_tokenize_sb(cpi, &x->e_mbd, t, 0); + } else { + int mb_skip_context = + cpi->common.mb_no_coeff_skip ? + (mi - 1)->mbmi.mb_skip_coeff + + (mi - mis)->mbmi.mb_skip_coeff : + 0; + mi->mbmi.mb_skip_coeff = 1; + if (cm->mb_no_coeff_skip) { + cpi->skip_true_count[mb_skip_context]++; + vp9_fix_contexts_sb(xd); + } else { + vp9_stuff_sb(cpi, xd, t, 0); + cpi->skip_false_count[mb_skip_context]++; + } + } + + // copy skip flag on all mb_mode_info contexts in this SB + // if this was a skip at this txfm size + if (mb_col < cm->mb_cols - 1) + mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; + if (mb_row < cm->mb_rows - 1) { + mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; + if (mb_col < cm->mb_cols - 1) + mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; + } + skip[0] = skip[2] = skip[1] = skip[3] = mi->mbmi.mb_skip_coeff; + } else { +#endif for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; @@ -2405,7 +2491,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, memcpy(&ta[n], xd->above_context, sizeof(ta[n])); memcpy(&tl[n], xd->left_context, sizeof(tl[n])); tp[n] = *t; - xd->mode_info_context = mi + x_idx + y_idx * cm->mode_info_stride; + xd->mode_info_context = mi + x_idx + y_idx * mis; vp9_subtract_mby_s_c(x->src_diff, src + x_idx * 16 + y_idx * 16 * src_y_stride, @@ -2433,7 +2519,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, int mb_skip_context = cpi->common.mb_no_coeff_skip ? (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + - (x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff : + (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff : 0; xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1; if (cpi->common.mb_no_coeff_skip) { @@ -2450,20 +2536,29 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, xd->mode_info_context = mi; update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip); +#if CONFIG_TX32X32 + } +#endif if (cm->txfm_mode == TX_MODE_SELECT && !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) || (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { - cpi->txfm_count[mi->mbmi.txfm_size]++; + cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; } else { - TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_16X16 : cm->txfm_mode; + TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? +#if CONFIG_TX32X32 + TX_32X32 : +#else + TX_16X16 : +#endif + cm->txfm_mode; mi->mbmi.txfm_size = sz; if (mb_col < cm->mb_cols - 1) mi[1].mbmi.txfm_size = sz; if (mb_row < cm->mb_rows - 1) { - mi[cm->mode_info_stride].mbmi.txfm_size = sz; + mi[mis].mbmi.txfm_size = sz; if (mb_col < cm->mb_cols - 1) - mi[cm->mode_info_stride + 1].mbmi.txfm_size = sz; + mi[mis + 1].mbmi.txfm_size = sz; } } } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 091f2f0fe..46087c28e 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -108,6 +108,52 @@ void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, int src_stride, } } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +void vp9_subtract_sby_s_c(short *diff, const unsigned char *src, int src_stride, + const unsigned char *pred, int dst_stride) { + int r, c; + + for (r = 0; r < 32; r++) { + for (c = 0; c < 32; c++) { + diff[c] = src[c] - pred[c]; + } + + diff += 32; + pred += dst_stride; + src += src_stride; + } +} + +void vp9_subtract_sbuv_s_c(short *diff, const unsigned char *usrc, + const unsigned char *vsrc, int src_stride, + const unsigned char *upred, + const unsigned char *vpred, int dst_stride) { + short *udiff = diff + 1024; + short *vdiff = diff + 1024 + 256; + int r, c; + + for (r = 0; r < 16; r++) { + for (c = 0; c < 16; c++) { + udiff[c] = usrc[c] - upred[c]; + } + + udiff += 16; + upred += dst_stride; + usrc += src_stride; + } + + for (r = 0; r < 16; r++) { + for (c = 0; c < 16; c++) { + vdiff[c] = vsrc[c] - vpred[c]; + } + + vdiff += 16; + vpred += dst_stride; + vsrc += src_stride; + } +} +#endif + void vp9_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride) { vp9_subtract_mby_s_c(diff, src, stride, pred, 16); @@ -265,6 +311,22 @@ void vp9_transform_mb_16x16(MACROBLOCK *x) { vp9_transform_mbuv_8x8(x); } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +void vp9_transform_sby_32x32(MACROBLOCK *x) { + SUPERBLOCK * const x_sb = &x->sb_coeff_data; + vp9_short_fdct32x32(x_sb->src_diff, x_sb->coeff, 64); +} + +void vp9_transform_sbuv_16x16(MACROBLOCK *x) { + SUPERBLOCK * const x_sb = &x->sb_coeff_data; + vp9_clear_system_state(); + x->vp9_short_fdct16x16(x_sb->src_diff + 1024, + x_sb->coeff + 1024, 32); + x->vp9_short_fdct16x16(x_sb->src_diff + 1280, + x_sb->coeff + 1280, 32); +} +#endif + #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) #define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) typedef struct vp9_token_state vp9_token_state; diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 4f49647a2..3c0a0a5a2 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -47,6 +47,11 @@ void vp9_transform_mb_16x16(MACROBLOCK *mb); void vp9_transform_mby_16x16(MACROBLOCK *x); void vp9_optimize_mby_16x16(MACROBLOCK *x); +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +void vp9_transform_sby_32x32(MACROBLOCK *x); +void vp9_transform_sbuv_16x16(MACROBLOCK *x); +#endif + void vp9_fidct_mb(MACROBLOCK *x); void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch); @@ -59,6 +64,14 @@ void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc, void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, int src_stride, const unsigned char *pred, int dst_stride); +#if CONFIG_TX32X32 +void vp9_subtract_sby_s_c(short *diff, const unsigned char *src, int src_stride, + const unsigned char *pred, int dst_stride); +void vp9_subtract_sbuv_s_c(short *diff, const unsigned char *usrc, + const unsigned char *vsrc, int src_stride, + const unsigned char *upred, + const unsigned char *vpred, int dst_stride); +#endif #endif #endif diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 0fe7a14b6..779534bac 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1810,7 +1810,7 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { #endif for (i = 0; i < COMP_PRED_CONTEXTS; i++) cm->prob_comppred[i] = 128; - for (i = 0; i < TX_SIZE_MAX - 1; i++) + for (i = 0; i < TX_SIZE_MAX_SB - 1; i++) cm->prob_tx[i] = 128; // Prime the recent reference frame useage counters. @@ -3698,6 +3698,9 @@ static void encode_frame_to_data_rate vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16); vp9_copy(cpi->common.fc.hybrid_coef_counts_16x16, cpi->hybrid_coef_counts_16x16); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_copy(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32); +#endif vp9_adapt_coef_probs(&cpi->common); if (cpi->common.frame_type != KEY_FRAME) { #if CONFIG_SUPERBLOCKS diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 511e62f1c..28acc96d4 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -109,6 +109,11 @@ typedef struct { vp9_prob hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_prob coef_probs_32x32[BLOCK_TYPES_32X32] + [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; +#endif + #if CONFIG_SUPERBLOCKS vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1]; #endif @@ -435,6 +440,15 @@ typedef struct VP9_COMP { DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_16x16[QINDEX_RANGE][256]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_16x16[QINDEX_RANGE][256]); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + DECLARE_ALIGNED(16, short, Y1zbin_32x32[QINDEX_RANGE][1024]); + DECLARE_ALIGNED(16, short, Y2zbin_32x32[QINDEX_RANGE][1024]); + DECLARE_ALIGNED(16, short, UVzbin_32x32[QINDEX_RANGE][1024]); + DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1_32x32[QINDEX_RANGE][1024]); + DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_32x32[QINDEX_RANGE][1024]); + DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_32x32[QINDEX_RANGE][1024]); +#endif + MACROBLOCK mb; VP9_COMMON common; VP9_CONFIG oxcf; @@ -483,8 +497,9 @@ typedef struct VP9_COMP { int comp_pred_count[COMP_PRED_CONTEXTS]; int single_pred_count[COMP_PRED_CONTEXTS]; // FIXME contextualize - int txfm_count[TX_SIZE_MAX]; - int txfm_count_8x8p[TX_SIZE_MAX - 1]; + int txfm_count_32x32p[TX_SIZE_MAX_SB]; + int txfm_count_16x16p[TX_SIZE_MAX_MB]; + int txfm_count_8x8p[TX_SIZE_MAX_MB - 1]; int64_t rd_tx_select_diff[NB_TXFM_MODES]; int rd_tx_select_threshes[4][NB_TXFM_MODES]; @@ -604,6 +619,12 @@ typedef struct VP9_COMP { vp9_prob frame_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; unsigned int frame_hybrid_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 + unsigned int coef_counts_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ + vp9_prob frame_coef_probs_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + unsigned int frame_branch_ct_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; +#endif + int gfu_boost; int last_boost; int kf_boost; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index f160edb56..fcc7d2948 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -323,28 +323,25 @@ void vp9_quantize_mb_16x16(MACROBLOCK *x) { vp9_quantize_mbuv_8x8(x); } -void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) { +static void quantize(short *zbin_boost_orig_ptr, + short *coeff_ptr, int n_coeffs, int max_coeffs, + short *zbin_ptr, short *round_ptr, short *quant_ptr, + unsigned char *quant_shift_ptr, + short *qcoeff_ptr, short *dqcoeff_ptr, + short *dequant_ptr, short zbin_oq_value, + int *eob_ptr, const int *scan, int mul) { int i, rc, eob; int zbin; int x, y, z, sz; - short *zbin_boost_ptr = b->zrun_zbin_boost_16x16; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin_16x16; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - unsigned char *quant_shift_ptr = b->quant_shift; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; + short *zbin_boost_ptr = zbin_boost_orig_ptr; - vpx_memset(qcoeff_ptr, 0, 256*sizeof(short)); - vpx_memset(dqcoeff_ptr, 0, 256*sizeof(short)); + vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(short)); eob = -1; - for (i = 0; i < b->eob_max_offset_16x16; i++) { - rc = vp9_default_zig_zag1d_16x16[i]; - z = coeff_ptr[rc]; + for (i = 0; i < max_coeffs; i++) { + rc = scan[i]; + z = coeff_ptr[rc] * mul; zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value); zbin_boost_ptr ++; @@ -354,22 +351,70 @@ void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) { if (x >= zbin) { x += (round_ptr[rc!=0]); - y = ((int)(((int)(x * quant_ptr[rc!=0]) >> 16) + x)) + y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) >> quant_shift_ptr[rc!=0]; // quantize (x) x = (y ^ sz) - sz; // get the sign back qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / mul; // dequantized value if (y) { eob = i; // last nonzero coeffs - zbin_boost_ptr = b->zrun_zbin_boost_16x16; + zbin_boost_ptr = zbin_boost_orig_ptr; } } } - d->eob = eob + 1; + *eob_ptr = eob + 1; } +void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) { + quantize(b->zrun_zbin_boost_16x16, + b->coeff, + 256, b->eob_max_offset_16x16, + b->zbin_16x16, b->round, b->quant, b->quant_shift, + d->qcoeff, + d->dqcoeff, + d->dequant, + b->zbin_extra, + &d->eob, vp9_default_zig_zag1d_16x16, 1); +} + +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +void vp9_quantize_sby_32x32(MACROBLOCK *x) { + x->e_mbd.block[0].eob = 0; + quantize(x->block[0].zrun_zbin_boost_32x32, + x->sb_coeff_data.coeff, + 1024, x->block[0].eob_max_offset_32x32, + x->block[0].zbin_32x32, + x->block[0].round, x->block[0].quant, x->block[0].quant_shift, + x->e_mbd.sb_coeff_data.qcoeff, + x->e_mbd.sb_coeff_data.dqcoeff, + x->e_mbd.block[0].dequant, + x->block[0].zbin_extra, + &x->e_mbd.block[0].eob, + vp9_default_zig_zag1d_32x32, 2); +} + +void vp9_quantize_sbuv_16x16(MACROBLOCK *x) { + int i; + + x->e_mbd.block[16].eob = 0; + x->e_mbd.block[20].eob = 0; + for (i = 16; i < 24; i += 4) + quantize(x->block[i].zrun_zbin_boost_16x16, + x->sb_coeff_data.coeff + 1024 + (i - 16) * 64, + 256, x->block[i].eob_max_offset_16x16, + x->block[i].zbin_16x16, + x->block[i].round, x->block[0].quant, x->block[i].quant_shift, + x->e_mbd.sb_coeff_data.qcoeff + 1024 + (i - 16) * 64, + x->e_mbd.sb_coeff_data.dqcoeff + 1024 + (i - 16) * 64, + x->e_mbd.block[i].dequant, + x->block[i].zbin_extra, + &x->e_mbd.block[i].eob, + vp9_default_zig_zag1d_16x16, 1); +} +#endif + /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of * these two C functions if corresponding optimized routine is not available. * NEON optimized version implements currently the fast quantization for pair @@ -427,6 +472,74 @@ void vp9_init_quantizer(VP9_COMP *cpi) { 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, }; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + static const int zbin_boost_32x32[1024] = { + 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, + 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + }; +#endif int qrounding_factor = 48; @@ -454,7 +567,13 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7; cpi->zrun_zbin_boost_y1_8x8[Q][0] = ((quant_val * zbin_boost_8x8[0]) + 64) >> 7; - cpi->zrun_zbin_boost_y1_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; + cpi->zrun_zbin_boost_y1_16x16[Q][0] = + ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + cpi->Y1zbin_32x32[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; + cpi->zrun_zbin_boost_y1_32x32[Q][0] = + ((quant_val * zbin_boost_32x32[0]) + 64) >> 7; +#endif quant_val = vp9_dc2quant(Q, cpi->common.y2dc_delta_q); @@ -468,7 +587,8 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7; cpi->zrun_zbin_boost_y2_8x8[Q][0] = ((quant_val * zbin_boost_8x8[0]) + 64) >> 7; - cpi->zrun_zbin_boost_y2_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; + cpi->zrun_zbin_boost_y2_16x16[Q][0] = + ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; quant_val = vp9_dc_uv_quant(Q, cpi->common.uvdc_delta_q); invert_quant(cpi->UVquant[Q] + 0, @@ -481,7 +601,8 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; cpi->zrun_zbin_boost_uv_8x8[Q][0] = ((quant_val * zbin_boost_8x8[0]) + 64) >> 7; - cpi->zrun_zbin_boost_uv_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; + cpi->zrun_zbin_boost_uv_16x16[Q][0] = + ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; // all the 4x4 ac values =; for (i = 1; i < 16; i++) { @@ -543,16 +664,30 @@ void vp9_init_quantizer(VP9_COMP *cpi) { quant_val = vp9_ac_yquant(Q); cpi->Y1zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->zrun_zbin_boost_y1_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; + cpi->zrun_zbin_boost_y1_16x16[Q][i] = + ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q); cpi->Y2zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->zrun_zbin_boost_y2_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; + cpi->zrun_zbin_boost_y2_16x16[Q][i] = + ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q); cpi->UVzbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; - cpi->zrun_zbin_boost_uv_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; + cpi->zrun_zbin_boost_uv_16x16[Q][i] = + ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + // 32x32 structures. Same comment above applies. + for (i = 1; i < 1024; i++) { + int rc = vp9_default_zig_zag1d_32x32[i]; + + quant_val = vp9_ac_yquant(Q); + cpi->Y1zbin_32x32[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7; + cpi->zrun_zbin_boost_y1_32x32[Q][i] = + ((quant_val * zbin_boost_32x32[i]) + 64) >> 7; + } +#endif } } @@ -592,11 +727,17 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { x->block[i].zbin = cpi->Y1zbin[QIndex]; x->block[i].zbin_8x8 = cpi->Y1zbin_8x8[QIndex]; x->block[i].zbin_16x16 = cpi->Y1zbin_16x16[QIndex]; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + x->block[i].zbin_32x32 = cpi->Y1zbin_32x32[QIndex]; +#endif x->block[i].round = cpi->Y1round[QIndex]; x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex]; x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex]; x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y1_8x8[QIndex]; x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y1_16x16[QIndex]; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + x->block[i].zrun_zbin_boost_32x32 = cpi->zrun_zbin_boost_y1_32x32[QIndex]; +#endif x->block[i].zbin_extra = (short)zbin_extra; // Segment max eob offset feature. @@ -607,10 +748,17 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); x->block[i].eob_max_offset_16x16 = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + x->block[i].eob_max_offset_32x32 = + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); +#endif } else { x->block[i].eob_max_offset = 16; x->block[i].eob_max_offset_8x8 = 64; x->block[i].eob_max_offset_16x16 = 256; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + x->block[i].eob_max_offset_32x32 = 1024; +#endif } } @@ -640,9 +788,12 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); x->block[i].eob_max_offset_8x8 = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); + x->block[i].eob_max_offset_16x16 = + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); } else { x->block[i].eob_max_offset = 16; x->block[i].eob_max_offset_8x8 = 64; + x->block[i].eob_max_offset_16x16 = 256; } } diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index dd11e75ba..832a486f5 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -78,6 +78,11 @@ void vp9_quantize_mb_16x16(MACROBLOCK *x); extern prototype_quantize_block(vp9_quantize_quantb_16x16); extern prototype_quantize_mb(vp9_quantize_mby_16x16); +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +void vp9_quantize_sby_32x32(MACROBLOCK *x); +void vp9_quantize_sbuv_16x16(MACROBLOCK *x); +#endif + struct VP9_COMP; extern void vp9_set_quantizer(struct VP9_COMP *cpi, int Q); diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index b589243e8..c896e41b1 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -175,6 +175,9 @@ void vp9_save_coding_context(VP9_COMP *cpi) { vp9_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8); vp9_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16); vp9_copy(cc->hybrid_coef_probs_16x16, cm->fc.hybrid_coef_probs_16x16); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_copy(cc->coef_probs_32x32, cm->fc.coef_probs_32x32); +#endif vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob); #if CONFIG_COMP_INTERINTRA_PRED cc->interintra_prob = cm->fc.interintra_prob; @@ -234,6 +237,9 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { vp9_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8); vp9_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16); vp9_copy(cm->fc.hybrid_coef_probs_16x16, cc->hybrid_coef_probs_16x16); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_copy(cm->fc.coef_probs_32x32, cc->coef_probs_32x32); +#endif vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob); #if CONFIG_COMP_INTERINTRA_PRED cm->fc.interintra_prob = cc->interintra_prob; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 9cea18969..60f14f8fe 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -400,12 +400,18 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) { cpi->common.fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + fill_token_costs( + cpi->mb.token_costs[TX_32X32], + (const vp9_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_32x32, + BLOCK_TYPES_32X32); +#endif + /*rough estimate for costing*/ cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; vp9_init_mode_costs(cpi); - if (cpi->common.frame_type != KEY_FRAME) - { + if (cpi->common.frame_type != KEY_FRAME) { vp9_build_nmv_cost_table( cpi->mb.nmvjointcost, cpi->mb.e_mbd.allow_high_precision_mv ? @@ -556,7 +562,7 @@ static int cost_coeffs_2x2(MACROBLOCK *mb, static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int tx_size) { + TX_SIZE tx_size) { const int eob = b->eob; int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */ int cost = 0, default_eob, seg_eob; @@ -613,9 +619,24 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, default_eob = 256; if (type == PLANE_TYPE_Y_WITH_DC) { tx_type = get_tx_type_16x16(xd, b); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + } else if (type == PLANE_TYPE_UV) { + int ib = (int)(b - xd->block) - 16; + + qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * ib; +#endif } break; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + case TX_32X32: + scan = vp9_default_zig_zag1d_32x32; + band = vp9_coef_bands_32x32; + default_eob = 1024; + qcoeff_ptr = xd->sb_coeff_data.qcoeff; + break; +#endif default: + abort(); break; } if (vp9_segfeature_active(&mb->e_mbd, segment_id, SEG_LVL_EOB)) @@ -813,23 +834,28 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, } static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, - int r[2][TX_SIZE_MAX], int *rate, - int d[TX_SIZE_MAX], int *distortion, - int s[TX_SIZE_MAX], int *skip, - int64_t txfm_cache[NB_TXFM_MODES]) { + int (*r)[2], int *rate, + int *d, int *distortion, + int *s, int *skip, + int64_t txfm_cache[NB_TXFM_MODES], + TX_SIZE max_txfm_size) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; vp9_prob skip_prob = cm->mb_no_coeff_skip ? vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128; - int64_t rd[2][TX_SIZE_MAX]; - int n; + int64_t rd[TX_SIZE_MAX_SB][2]; + int n, m; - r[1][TX_16X16] = r[0][TX_16X16] + vp9_cost_one(cm->prob_tx[0]) + - vp9_cost_one(cm->prob_tx[1]); - r[1][TX_8X8] = r[0][TX_8X8] + vp9_cost_one(cm->prob_tx[0]) + - vp9_cost_zero(cm->prob_tx[1]); - r[1][TX_4X4] = r[0][TX_4X4] + vp9_cost_zero(cm->prob_tx[0]); + for (n = TX_4X4; n <= max_txfm_size; n++) { + r[n][1] = r[n][0]; + for (m = 0; m <= n - (n == max_txfm_size); m++) { + if (m == n) + r[n][1] += vp9_cost_zero(cm->prob_tx[m]); + else + r[n][1] += vp9_cost_one(cm->prob_tx[m]); + } + } if (cm->mb_no_coeff_skip) { int s0, s1; @@ -838,64 +864,82 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); - for (n = TX_4X4; n <= TX_16X16; n++) { + for (n = TX_4X4; n <= max_txfm_size; n++) { if (s[n]) { - rd[0][n] = rd[1][n] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); + rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); } else { - rd[0][n] = RDCOST(x->rdmult, x->rddiv, r[0][n] + s0, d[n]); - rd[1][n] = RDCOST(x->rdmult, x->rddiv, r[1][n] + s0, d[n]); + rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); + rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } } } else { - for (n = TX_4X4; n <= TX_16X16; n++) { - rd[0][n] = RDCOST(x->rdmult, x->rddiv, r[0][n], d[n]); - rd[1][n] = RDCOST(x->rdmult, x->rddiv, r[1][n], d[n]); + for (n = TX_4X4; n <= max_txfm_size; n++) { + rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]); + rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]); } } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + if (max_txfm_size == TX_32X32 && + (cm->txfm_mode == ALLOW_32X32 || + (cm->txfm_mode == TX_MODE_SELECT && + rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && + rd[TX_32X32][1] < rd[TX_4X4][1]))) { + mbmi->txfm_size = TX_32X32; + } else +#endif if ( cm->txfm_mode == ALLOW_16X16 || +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) || +#endif (cm->txfm_mode == TX_MODE_SELECT && - rd[1][TX_16X16] < rd[1][TX_8X8] && rd[1][TX_16X16] < rd[1][TX_4X4])) { + rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])) { mbmi->txfm_size = TX_16X16; } else if (cm->txfm_mode == ALLOW_8X8 || - (cm->txfm_mode == TX_MODE_SELECT && rd[1][TX_8X8] < rd[1][TX_4X4])) { + (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { mbmi->txfm_size = TX_8X8; } else { - assert(cm->txfm_mode == ONLY_4X4 || - (cm->txfm_mode == TX_MODE_SELECT && rd[1][TX_4X4] <= rd[1][TX_8X8])); + assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT); mbmi->txfm_size = TX_4X4; } *distortion = d[mbmi->txfm_size]; - *rate = r[cm->txfm_mode == TX_MODE_SELECT][mbmi->txfm_size]; + *rate = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT]; *skip = s[mbmi->txfm_size]; - txfm_cache[ONLY_4X4] = rd[0][TX_4X4]; - txfm_cache[ALLOW_8X8] = rd[0][TX_8X8]; - txfm_cache[ALLOW_16X16] = rd[0][TX_16X16]; - if (rd[1][TX_16X16] < rd[1][TX_8X8] && rd[1][TX_16X16] < rd[1][TX_4X4]) - txfm_cache[TX_MODE_SELECT] = rd[1][TX_16X16]; + txfm_cache[ONLY_4X4] = rd[TX_4X4][0]; + txfm_cache[ALLOW_8X8] = rd[TX_8X8][0]; + txfm_cache[ALLOW_16X16] = rd[TX_16X16][0]; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0]; + if (max_txfm_size == TX_32X32 && + rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && + rd[TX_32X32][1] < rd[TX_4X4][1]) + txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; else - txfm_cache[TX_MODE_SELECT] = rd[1][TX_4X4] < rd[1][TX_8X8] ? - rd[1][TX_4X4] : rd[1][TX_8X8]; +#endif + if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) + txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; + else + txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? + rd[TX_4X4][1] : rd[TX_8X8][1]; } static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int *skippable, int64_t txfm_cache[NB_TXFM_MODES]) { MACROBLOCKD *const xd = &x->e_mbd; - int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX]; + int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB]; vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor, x->block[0].src_stride); - macro_block_yrd_16x16(x, &r[0][TX_16X16], &d[TX_16X16], - &s[TX_16X16], 1); - macro_block_yrd_8x8(x, &r[0][TX_8X8], &d[TX_8X8], &s[TX_8X8], 1); - macro_block_yrd_4x4(x, &r[0][TX_4X4], &d[TX_4X4], &s[TX_4X4], 1); + macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1); + macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1); + macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1); choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable, - txfm_cache); + txfm_cache, TX_16X16); } static void copy_predictor(unsigned char *dst, const unsigned char *predictor) { @@ -908,25 +952,91 @@ static void copy_predictor(unsigned char *dst, const unsigned char *predictor) { } #if CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 +static int rdcost_sby_32x32(MACROBLOCK *x) { + MACROBLOCKD * const xd = &x->e_mbd; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above, + *tl = (ENTROPY_CONTEXT *) &t_left; + + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); +} + +static int vp9_sb_block_error_c(short *coeff, short *dqcoeff, int block_size) { + int i; + int64_t error = 0; + + for (i = 0; i < block_size; i++) { + unsigned int this_diff = coeff[i] - dqcoeff[i]; + error += this_diff * this_diff; + } + + return error > INT_MAX ? INT_MAX : error; +} + +#define DEBUG_ERROR 0 +static void super_block_yrd_32x32(MACROBLOCK *x, + int *rate, int *distortion, int *skippable) { + SUPERBLOCK * const x_sb = &x->sb_coeff_data; + MACROBLOCKD * const xd = &x->e_mbd; + SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data; +#if DEBUG_ERROR || CONFIG_DWT32X32HYBRID + short out[1024]; +#endif + + vp9_transform_sby_32x32(x); + vp9_quantize_sby_32x32(x); +#if DEBUG_ERROR || CONFIG_DWT32X32HYBRID + vp9_short_idct32x32(xd_sb->dqcoeff, out, 64); +#endif + +#if !CONFIG_DWT32X32HYBRID + *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024); +#else + *distortion = vp9_block_error_c(x_sb->src_diff, out, 1024) << 4; +#endif +#if DEBUG_ERROR + printf("IDCT/FDCT error 32x32: %d (d: %d)\n", + vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion); +#endif + *rate = rdcost_sby_32x32(x); + *skippable = vp9_sby_is_skippable_32x32(&x->e_mbd); +} +#endif + static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int *skip, int64_t txfm_cache[NB_TXFM_MODES]) { MACROBLOCKD *const xd = &x->e_mbd; - int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX], n; + int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n; const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - ENTROPY_CONTEXT_PLANES t_above[3][2], *orig_above = xd->above_context; - ENTROPY_CONTEXT_PLANES t_left[3][2], *orig_left = xd->left_context; + ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_MB][2], + *orig_above = xd->above_context; + ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_MB][2], + *orig_left = xd->left_context; - for (n = TX_4X4; n <= TX_16X16; n++) { + for (n = TX_4X4; n < TX_SIZE_MAX_MB; n++) { vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n])); vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n])); - r[0][n] = 0; + r[n][0] = 0; d[n] = 0; s[n] = 1; } +#if CONFIG_TX32X32 + vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, + dst, dst_y_stride); + super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); +#endif + +#if DEBUG_ERROR + int err[3] = { 0, 0, 0 }; +#endif for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; int r_tmp, d_tmp, s_tmp; @@ -941,25 +1051,42 @@ static void super_block_yrd(VP9_COMP *cpi, xd->left_context = &t_left[TX_16X16][y_idx]; macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0); d[TX_16X16] += d_tmp; - r[0][TX_16X16] += r_tmp; + r[TX_16X16][0] += r_tmp; s[TX_16X16] = s[TX_16X16] && s_tmp; +#if DEBUG_ERROR + vp9_inverse_transform_mby_16x16(xd); + err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256); +#endif xd->above_context = &t_above[TX_4X4][x_idx]; xd->left_context = &t_left[TX_4X4][y_idx]; macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0); d[TX_4X4] += d_tmp; - r[0][TX_4X4] += r_tmp; + r[TX_4X4][0] += r_tmp; s[TX_4X4] = s[TX_4X4] && s_tmp; +#if DEBUG_ERROR + vp9_inverse_transform_mby_4x4(xd); + err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256); +#endif xd->above_context = &t_above[TX_8X8][x_idx]; xd->left_context = &t_left[TX_8X8][y_idx]; macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0); d[TX_8X8] += d_tmp; - r[0][TX_8X8] += r_tmp; + r[TX_8X8][0] += r_tmp; s[TX_8X8] = s[TX_8X8] && s_tmp; +#if DEBUG_ERROR + vp9_inverse_transform_mby_8x8(xd); + err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256); +#endif } - - choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache); +#if DEBUG_ERROR + printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]); + printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]); + printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]); +#endif + choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, + TX_SIZE_MAX_SB - 1); xd->above_context = orig_above; xd->left_context = orig_left; @@ -1632,14 +1759,59 @@ static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, } #if CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 +static int rd_cost_sbuv_16x16(MACROBLOCK *x) { + int b; + int cost = 0; + MACROBLOCKD *const xd = &x->e_mbd; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta, *tl; + + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *) &t_above; + tl = (ENTROPY_CONTEXT *) &t_left; + + for (b = 16; b < 24; b += 4) + cost += cost_coeffs(x, xd->block + b, PLANE_TYPE_UV, + ta + vp9_block2above_8x8[b], + tl + vp9_block2left_8x8[b], TX_16X16); + + return cost; +} + +static void rd_inter32x32_uv_16x16(MACROBLOCK *x, int *rate, + int *distortion, int *skip) { + MACROBLOCKD *const xd = &x->e_mbd; + + vp9_transform_sbuv_16x16(x); + vp9_quantize_sbuv_16x16(x); + + *rate = rd_cost_sbuv_16x16(x); + *distortion = vp9_block_error_c(x->sb_coeff_data.coeff + 1024, + xd->sb_coeff_data.dqcoeff + 1024, 512) >> 2; + *skip = vp9_sbuv_is_skippable_16x16(xd); +} +#endif + static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel, int *skip) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - int n, r = 0, d = 0; const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; + +#if CONFIG_TX32X32 + if (mbmi->txfm_size == TX_32X32) { + vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride); + rd_inter32x32_uv_16x16(x, rate, distortion, skip); + } else { +#endif + int n, r = 0, d = 0; int skippable = 1; ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; ENTROPY_CONTEXT_PLANES *ta = xd->above_context; @@ -1680,8 +1852,11 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, xd->above_context = ta; memcpy(xd->above_context, t_above, sizeof(t_above)); memcpy(xd->left_context, t_left, sizeof(t_left)); +#if CONFIG_TX32X32 + } +#endif - return RDCOST(x->rdmult, x->rddiv, r, d); + return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } #endif @@ -1818,15 +1993,26 @@ static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi, } #if CONFIG_SUPERBLOCKS -static void super_block_uvrd_8x8(MACROBLOCK *x, - int *rate, - int *distortion, - int *skippable) { +// TODO(rbultje) very similar to rd_inter32x32_uv(), merge? +static void super_block_uvrd(MACROBLOCK *x, + int *rate, + int *distortion, + int *skippable) { MACROBLOCKD *const xd = &x->e_mbd; - int d = 0, r = 0, n, s = 1; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; + +#if CONFIG_TX32X32 + if (mbmi->txfm_size == TX_32X32) { + vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride); + rd_inter32x32_uv_16x16(x, rate, distortion, skippable); + } else { +#endif + int d = 0, r = 0, n, s = 1; ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; ENTROPY_CONTEXT_PLANES *ta = xd->above_context; ENTROPY_CONTEXT_PLANES *tl = xd->left_context; @@ -1844,9 +2030,15 @@ static void super_block_uvrd_8x8(MACROBLOCK *x, udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, dst_uv_stride); - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); - s &= vp9_mbuv_is_skippable_8x8(xd); + if (mbmi->txfm_size == TX_4X4) { + vp9_transform_mbuv_4x4(x); + vp9_quantize_mbuv_4x4(x); + s &= vp9_mbuv_is_skippable_4x4(xd); + } else { + vp9_transform_mbuv_8x8(x); + vp9_quantize_mbuv_8x8(x); + s &= vp9_mbuv_is_skippable_8x8(xd); + } d += vp9_mbuverror(x) >> 2; xd->above_context = ta + x_idx; @@ -1864,6 +2056,9 @@ static void super_block_uvrd_8x8(MACROBLOCK *x, xd->above_context = ta; memcpy(xd->above_context, t_above, sizeof(t_above)); memcpy(xd->left_context, t_left, sizeof(t_left)); +#if CONFIG_TX32X32 + } +#endif } static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, @@ -1882,8 +2077,8 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, x->e_mbd.mode_info_context->mbmi.uv_mode = mode; vp9_build_intra_predictors_sbuv_s(&x->e_mbd); - super_block_uvrd_8x8(x, &this_rate_tokenonly, - &this_distortion, &s); + super_block_uvrd(x, &this_rate_tokenonly, + &this_distortion, &s); this_rate = this_rate_tokenonly + x->intra_uv_mode_cost[x->e_mbd.frame_type][mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); @@ -4141,8 +4336,6 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int y_skip, uv_skip; int64_t txfm_cache[NB_TXFM_MODES]; - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - error_y = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, txfm_cache); error_uv = rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, @@ -4362,6 +4555,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int dist_uv_4x4 = 0, dist_uv_8x8 = 0, uv_skip_4x4 = 0, uv_skip_8x8 = 0; MB_PREDICTION_MODE mode_uv_4x4 = NEARESTMV, mode_uv_8x8 = NEARESTMV; int switchable_filter_index = 0; +#if CONFIG_TX32X32 + int rate_uv_16x16 = 0, rate_uv_tokenonly_16x16 = 0; + int dist_uv_16x16 = 0, uv_skip_16x16 = 0; + MB_PREDICTION_MODE mode_uv_16x16; +#endif x->skip = 0; xd->mode_info_context->mbmi.segment_id = segment_id; @@ -4397,6 +4595,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &dist_uv_8x8, &uv_skip_8x8); mode_uv_8x8 = mbmi->uv_mode; } +#if CONFIG_TX32X32 + if (cm->txfm_mode >= ALLOW_32X32) { + mbmi->txfm_size = TX_32X32; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_16x16, &rate_uv_tokenonly_16x16, + &dist_uv_16x16, &uv_skip_16x16); + mode_uv_16x16 = mbmi->uv_mode; + } +#endif for (mode_index = 0; mode_index < MAX_MODES; mode_index += (!switchable_filter_index)) { @@ -4524,6 +4730,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, distortion_uv = dist_uv_4x4; skippable = skippable && uv_skip_4x4; mbmi->uv_mode = mode_uv_4x4; +#if CONFIG_TX32X32 + } else if (mbmi->txfm_size == TX_32X32) { + rate_uv = rate_uv_16x16; + distortion_uv = dist_uv_16x16; + skippable = skippable && uv_skip_16x16; + mbmi->uv_mode = mode_uv_16x16; +#endif } else { rate_uv = rate_uv_8x8; distortion_uv = dist_uv_8x8; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 44963b223..a662e048e 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -117,7 +117,7 @@ static void tokenize_b(VP9_COMP *cpi, int dry_run) { int pt; /* near block/prev token context index */ int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; - const int eob = b->eob; /* one beyond last nonzero coeff */ + int eob = b->eob; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp; /* store tokens starting here */ const short *qcoeff_ptr = b->qcoeff; int seg_eob; @@ -177,7 +177,23 @@ static void tokenize_b(VP9_COMP *cpi, counts = cpi->coef_counts_16x16; probs = cpi->common.fc.coef_probs_16x16; } +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 + if (type == PLANE_TYPE_UV) { + int uv_idx = (((int) (b - xd->block)) - 16) >> 2; + qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 256 * uv_idx; + } +#endif break; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + case TX_32X32: + seg_eob = 1024; + bands = vp9_coef_bands_32x32; + scan = vp9_default_zig_zag1d_32x32; + counts = cpi->coef_counts_32x32; + probs = cpi->common.fc.coef_probs_32x32; + qcoeff_ptr = xd->sb_coeff_data.qcoeff; + break; +#endif } if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) @@ -283,6 +299,79 @@ static int mb_is_skippable_16x16(MACROBLOCKD *xd) { return (vp9_mby_is_skippable_16x16(xd) & vp9_mbuv_is_skippable_8x8(xd)); } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) { + int skip = 1; + skip &= !xd->block[0].eob; + return skip; +} + +int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) { + return (!xd->block[16].eob) & (!xd->block[20].eob); +} + +static int sb_is_skippable_32x32(MACROBLOCKD *xd) { + return vp9_sby_is_skippable_32x32(xd) && + vp9_sbuv_is_skippable_16x16(xd); +} + +void vp9_tokenize_sb(VP9_COMP *cpi, + MACROBLOCKD *xd, + TOKENEXTRA **t, + int dry_run) { + VP9_COMMON * const cm = &cpi->common; + MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi; + TOKENEXTRA *t_backup = *t; + ENTROPY_CONTEXT *A[2] = { (ENTROPY_CONTEXT *) (xd->above_context + 0), + (ENTROPY_CONTEXT *) (xd->above_context + 1), }; + ENTROPY_CONTEXT *L[2] = { (ENTROPY_CONTEXT *) (xd->left_context + 0), + (ENTROPY_CONTEXT *) (xd->left_context + 1), }; + const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP); + const int segment_id = mbmi->segment_id; + const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || + (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0); + int b; + + mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd); + + if (mbmi->mb_skip_coeff) { + if (!dry_run) + cpi->skip_true_count[mb_skip_context] += skip_inc; + if (!cm->mb_no_coeff_skip) { + vp9_stuff_sb(cpi, xd, t, dry_run); + } else { + vp9_fix_contexts_sb(xd); + } + if (dry_run) + *t = t_backup; + return; + } + + if (!dry_run) + cpi->skip_false_count[mb_skip_context] += skip_inc; + + tokenize_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, + A[0], L[0], TX_32X32, dry_run); + A[0][1] = A[0][2] = A[0][3] = A[0][0]; + L[0][1] = L[0][2] = L[0][3] = L[0][0]; + + for (b = 16; b < 24; b += 4) { + tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, + A[0] + vp9_block2above_8x8[b], L[0] + vp9_block2left_8x8[b], + TX_16X16, dry_run); + A[0][vp9_block2above_8x8[b] + 1] = A[0][vp9_block2above_8x8[b]]; + L[0][vp9_block2left_8x8[b] + 1] = L[0][vp9_block2left_8x8[b]]; + } + vpx_memset(&A[0][8], 0, sizeof(A[0][8])); + vpx_memset(&L[0][8], 0, sizeof(L[0][8])); + vpx_memcpy(A[1], A[0], sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(L[1], L[0], sizeof(ENTROPY_CONTEXT_PLANES)); + + if (dry_run) + *t = t_backup; +} +#endif + void vp9_tokenize_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, @@ -717,6 +806,13 @@ static __inline void stuff_b(VP9_COMP *cpi, probs = cpi->common.fc.coef_probs_16x16; } break; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + case TX_32X32: + bands = vp9_coef_bands_32x32; + counts = cpi->coef_counts_32x32; + probs = cpi->common.fc.coef_probs_32x32; + break; +#endif } band = bands[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0]; t->Token = DCT_EOB_TOKEN; @@ -775,7 +871,8 @@ static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd, A[1] = A[2] = A[3] = A[0]; L[1] = L[2] = L[3] = L[0]; for (b = 16; b < 24; b += 4) { - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, A + vp9_block2above[b], + stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, + A + vp9_block2above_8x8[b], L + vp9_block2above_8x8[b], TX_8X8, dry_run); A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; @@ -869,6 +966,43 @@ void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { } } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +static void stuff_sb_32x32(VP9_COMP *cpi, MACROBLOCKD *xd, + TOKENEXTRA **t, int dry_run) { + ENTROPY_CONTEXT *A[2] = { (ENTROPY_CONTEXT *) (xd->above_context + 0), + (ENTROPY_CONTEXT *) (xd->above_context + 1), }; + ENTROPY_CONTEXT *L[2] = { (ENTROPY_CONTEXT *) (xd->left_context + 0), + (ENTROPY_CONTEXT *) (xd->left_context + 1), }; + int b; + + stuff_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, + A[0], L[0], TX_32X32, dry_run); + A[0][1] = A[0][2] = A[0][3] = A[0][0]; + L[0][1] = L[0][2] = L[0][3] = L[0][0]; + for (b = 16; b < 24; b += 4) { + stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, + A[0] + vp9_block2above_8x8[b], + L[0] + vp9_block2above_8x8[b], TX_16X16, dry_run); + A[0][vp9_block2above_8x8[b] + 1] = A[0][vp9_block2above_8x8[b]]; + L[0][vp9_block2left_8x8[b] + 1] = L[0][vp9_block2left_8x8[b]]; + } + vpx_memset(&A[0][8], 0, sizeof(A[0][8])); + vpx_memset(&L[0][8], 0, sizeof(L[0][8])); + vpx_memcpy(A[1], A[0], sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(L[1], L[0], sizeof(ENTROPY_CONTEXT_PLANES)); +} + +void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { + TOKENEXTRA * const t_backup = *t; + + stuff_sb_32x32(cpi, xd, t, dry_run); + + if (dry_run) { + *t = t_backup; + } +} +#endif + void vp9_fix_contexts(MACROBLOCKD *xd) { /* Clear entropy contexts for blocks */ if ((xd->mode_info_context->mbmi.mode != B_PRED @@ -885,3 +1019,10 @@ void vp9_fix_contexts(MACROBLOCKD *xd) { xd->left_context->y2 = 1; } } + +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +void vp9_fix_contexts_sb(MACROBLOCKD *xd) { + vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); + vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); +} +#endif diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 868909be3..cfd5db694 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -34,16 +34,29 @@ extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block); extern int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd); extern int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd); +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +extern int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd); +extern int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd); +#endif struct VP9_COMP; extern void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); +extern void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, + TOKENEXTRA **t, int dry_run); extern void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +extern void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, + TOKENEXTRA **t, int dry_run); +#endif extern void vp9_fix_contexts(MACROBLOCKD *xd); +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +extern void vp9_fix_contexts_sb(MACROBLOCKD *xd); +#endif #ifdef ENTROPY_STATS void init_context_counters(); From 885cf816ebb85ad795d7a21c15103e734eaa1bcd Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 7 Dec 2012 16:09:59 -0800 Subject: [PATCH 10/77] Introduce vp9_coeff_probs/counts/stats/accum types. Use these, instead of the 4/5-dimensional arrays, to hold statistics, counts, accumulations and probabilities for coefficient tokens. This commit also re-allows ENTROPY_STATS to compile. Change-Id: If441ffac936f52a3af91d8f2922ea8a0ceabdaa5 --- vp9/common/vp9_default_coef_probs.h | 35 +-- vp9/common/vp9_entropy.c | 218 ++++----------- vp9/common/vp9_entropy.h | 9 +- vp9/common/vp9_onyxc_int.h | 62 ++--- vp9/decoder/vp9_decodframe.c | 36 +-- vp9/decoder/vp9_detokenize.c | 47 ++-- vp9/decoder/vp9_onyxd_int.h | 2 +- vp9/encoder/vp9_bitstream.c | 410 +++++++++++----------------- vp9/encoder/vp9_block.h | 6 +- vp9/encoder/vp9_encodeframe.c | 4 +- vp9/encoder/vp9_mcomp.h | 1 + vp9/encoder/vp9_onyx_if.c | 12 +- vp9/encoder/vp9_onyx_int.h | 68 ++--- vp9/encoder/vp9_ratectrl.c | 8 +- vp9/encoder/vp9_rdopt.c | 52 ++-- vp9/encoder/vp9_tokenize.c | 318 +++++++++------------ vp9/encoder/vp9_tokenize.h | 17 +- 17 files changed, 509 insertions(+), 796 deletions(-) diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index 31103adb7..1255fce5e 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -12,10 +12,7 @@ /*Generated file, included by vp9_entropy.c*/ -static const vp9_prob default_coef_probs [BLOCK_TYPES] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] = { +static const vp9_coeff_probs default_coef_probs_4x4[BLOCK_TYPES_4X4] = { { /* Block Type ( 0 ) */ { @@ -254,10 +251,7 @@ static const vp9_prob default_coef_probs [BLOCK_TYPES] } }; -static const vp9_prob default_hybrid_coef_probs [BLOCK_TYPES] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] = { +static const vp9_coeff_probs default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4] = { { /* Block Type ( 0 ) */ { @@ -496,11 +490,7 @@ static const vp9_prob default_hybrid_coef_probs [BLOCK_TYPES] } }; -static const vp9_prob -default_coef_probs_8x8[BLOCK_TYPES_8X8] -[COEF_BANDS] -[PREV_COEF_CONTEXTS] -[ENTROPY_NODES] = { +static const vp9_coeff_probs default_coef_probs_8x8[BLOCK_TYPES_8X8] = { { /* block Type 0 */ { @@ -729,11 +719,7 @@ default_coef_probs_8x8[BLOCK_TYPES_8X8] } }; -static const vp9_prob -default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] = { +static const vp9_coeff_probs default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] = { { /* block Type 0 */ { @@ -962,11 +948,7 @@ default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] } }; -static const vp9_prob - default_coef_probs_16x16[BLOCK_TYPES_16X16] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] = { +static const vp9_coeff_probs default_coef_probs_16x16[BLOCK_TYPES_16X16] = { { /* block Type 0 */ { /* Coeff Band 0 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, @@ -1169,11 +1151,8 @@ static const vp9_prob } }; -static const vp9_prob - default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] = { +static const vp9_coeff_probs + default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] = { { /* block Type 0 */ { /* Coeff Band 0 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 321fa8c57..9d8e924d5 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -307,10 +307,10 @@ vp9_extra_bit_struct vp9_extra_bits[12] = { #include "vp9/common/vp9_default_coef_probs.h" void vp9_default_coef_probs(VP9_COMMON *pc) { - vpx_memcpy(pc->fc.coef_probs, default_coef_probs, - sizeof(pc->fc.coef_probs)); - vpx_memcpy(pc->fc.hybrid_coef_probs, default_hybrid_coef_probs, - sizeof(pc->fc.hybrid_coef_probs)); + vpx_memcpy(pc->fc.coef_probs_4x4, default_coef_probs_4x4, + sizeof(pc->fc.coef_probs_4x4)); + vpx_memcpy(pc->fc.hybrid_coef_probs_4x4, default_hybrid_coef_probs_4x4, + sizeof(pc->fc.hybrid_coef_probs_4x4)); vpx_memcpy(pc->fc.coef_probs_8x8, default_coef_probs_8x8, sizeof(pc->fc.coef_probs_8x8)); @@ -343,13 +343,42 @@ void vp9_coef_tree_initialize() { #define COEF_COUNT_SAT_AFTER_KEY 24 #define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128 -void vp9_adapt_coef_probs(VP9_COMMON *cm) { +static void update_coef_probs(vp9_coeff_probs *dst_coef_probs, + vp9_coeff_probs *pre_coef_probs, + int block_types, vp9_coeff_count *coef_counts, + int count_sat, int update_factor) { int t, i, j, k, count; unsigned int branch_ct[ENTROPY_NODES][2]; vp9_prob coef_probs[ENTROPY_NODES]; - int update_factor; /* denominator 256 */ int factor; + + for (i = 0; i < block_types; ++i) + for (j = 0; j < COEF_BANDS; ++j) + for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { + if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) + continue; + vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS, + vp9_coef_encodings, vp9_coef_tree, + coef_probs, branch_ct, + coef_counts[i][j][k], 256, 1); + for (t = 0; t < ENTROPY_NODES; ++t) { + int prob; + count = branch_ct[t][0] + branch_ct[t][1]; + count = count > count_sat ? count_sat : count; + factor = (update_factor * count / count_sat); + prob = ((int)pre_coef_probs[i][j][k][t] * (256 - factor) + + (int)coef_probs[t] * factor + 128) >> 8; + dst_coef_probs[i][j][k][t] = clip_prob(prob); + } + } +} + +void vp9_adapt_coef_probs(VP9_COMMON *cm) { +#ifdef COEF_COUNT_TESTING + int t, i, j, k; +#endif int count_sat; + int update_factor; /* denominator 256 */ // printf("Frame type: %d\n", cm->frame_type); if (cm->frame_type == KEY_FRAME) { @@ -422,159 +451,30 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) { } #endif - for (i = 0; i < BLOCK_TYPES; ++i) - for (j = 0; j < COEF_BANDS; ++j) - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, cm->fc.coef_counts [i][j][k], - 256, 1); - for (t = 0; t < ENTROPY_NODES; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - prob = ((int)cm->fc.pre_coef_probs[i][j][k][t] * (256 - factor) + - (int)coef_probs[t] * factor + 128) >> 8; - if (prob <= 0) cm->fc.coef_probs[i][j][k][t] = 1; - else if (prob > 255) cm->fc.coef_probs[i][j][k][t] = 255; - else cm->fc.coef_probs[i][j][k][t] = prob; - } - } - - for (i = 0; i < BLOCK_TYPES; ++i) - for (j = 0; j < COEF_BANDS; ++j) - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, cm->fc.hybrid_coef_counts [i][j][k], - 256, 1); - for (t = 0; t < ENTROPY_NODES; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - prob = ((int)cm->fc.pre_hybrid_coef_probs[i][j][k][t] * (256 - factor) + - (int)coef_probs[t] * factor + 128) >> 8; - if (prob <= 0) cm->fc.hybrid_coef_probs[i][j][k][t] = 1; - else if (prob > 255) cm->fc.hybrid_coef_probs[i][j][k][t] = 255; - else cm->fc.hybrid_coef_probs[i][j][k][t] = prob; - } - } - - for (i = 0; i < BLOCK_TYPES_8X8; ++i) - for (j = 0; j < COEF_BANDS; ++j) - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, cm->fc.coef_counts_8x8 [i][j][k], - 256, 1); - for (t = 0; t < ENTROPY_NODES; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - prob = ((int)cm->fc.pre_coef_probs_8x8[i][j][k][t] * (256 - factor) + - (int)coef_probs[t] * factor + 128) >> 8; - if (prob <= 0) cm->fc.coef_probs_8x8[i][j][k][t] = 1; - else if (prob > 255) cm->fc.coef_probs_8x8[i][j][k][t] = 255; - else cm->fc.coef_probs_8x8[i][j][k][t] = prob; - } - } - - for (i = 0; i < BLOCK_TYPES_8X8; ++i) - for (j = 0; j < COEF_BANDS; ++j) - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, cm->fc.hybrid_coef_counts_8x8 [i][j][k], - 256, 1); - for (t = 0; t < ENTROPY_NODES; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - prob = ((int)cm->fc.pre_hybrid_coef_probs_8x8[i][j][k][t] * - (256 - factor) + - (int)coef_probs[t] * factor + 128) >> 8; - if (prob <= 0) cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = 1; - else if (prob > 255) cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = 255; - else cm->fc.hybrid_coef_probs_8x8[i][j][k][t] = prob; - } - } - - for (i = 0; i < BLOCK_TYPES_16X16; ++i) - for (j = 0; j < COEF_BANDS; ++j) - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, cm->fc.coef_counts_16x16[i][j][k], 256, 1); - for (t = 0; t < ENTROPY_NODES; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - prob = ((int)cm->fc.pre_coef_probs_16x16[i][j][k][t] * - (256 - factor) + - (int)coef_probs[t] * factor + 128) >> 8; - if (prob <= 0) cm->fc.coef_probs_16x16[i][j][k][t] = 1; - else if (prob > 255) cm->fc.coef_probs_16x16[i][j][k][t] = 255; - else cm->fc.coef_probs_16x16[i][j][k][t] = prob; - } - } - - for (i = 0; i < BLOCK_TYPES_16X16; ++i) - for (j = 0; j < COEF_BANDS; ++j) - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, cm->fc.hybrid_coef_counts_16x16[i][j][k], 256, 1); - for (t = 0; t < ENTROPY_NODES; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - prob = ((int)cm->fc.pre_hybrid_coef_probs_16x16[i][j][k][t] * (256 - factor) + - (int)coef_probs[t] * factor + 128) >> 8; - if (prob <= 0) cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = 1; - else if (prob > 255) cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = 255; - else cm->fc.hybrid_coef_probs_16x16[i][j][k][t] = prob; - } - } - + update_coef_probs(cm->fc.coef_probs_4x4, cm->fc.pre_coef_probs_4x4, + BLOCK_TYPES_4X4, cm->fc.coef_counts_4x4, + count_sat, update_factor); + update_coef_probs(cm->fc.hybrid_coef_probs_4x4, + cm->fc.pre_hybrid_coef_probs_4x4, + BLOCK_TYPES_4X4, cm->fc.hybrid_coef_counts_4x4, + count_sat, update_factor); + update_coef_probs(cm->fc.coef_probs_8x8, cm->fc.pre_coef_probs_8x8, + BLOCK_TYPES_8X8, cm->fc.coef_counts_8x8, + count_sat, update_factor); + update_coef_probs(cm->fc.hybrid_coef_probs_8x8, + cm->fc.pre_hybrid_coef_probs_8x8, + BLOCK_TYPES_8X8, cm->fc.hybrid_coef_counts_8x8, + count_sat, update_factor); + update_coef_probs(cm->fc.coef_probs_16x16, cm->fc.pre_coef_probs_16x16, + BLOCK_TYPES_16X16, cm->fc.coef_counts_16x16, + count_sat, update_factor); + update_coef_probs(cm->fc.hybrid_coef_probs_16x16, + cm->fc.pre_hybrid_coef_probs_16x16, + BLOCK_TYPES_16X16, cm->fc.hybrid_coef_counts_16x16, + count_sat, update_factor); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - for (i = 0; i < BLOCK_TYPES_32X32; ++i) - for (j = 0; j < COEF_BANDS; ++j) - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, cm->fc.coef_counts_32x32[i][j][k], 256, 1); - for (t = 0; t < ENTROPY_NODES; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > count_sat ? count_sat : count; - factor = (update_factor * count / count_sat); - prob = ((int)cm->fc.pre_coef_probs_32x32[i][j][k][t] * - (256 - factor) + - (int)coef_probs[t] * factor + 128) >> 8; - if (prob <= 0) cm->fc.coef_probs_32x32[i][j][k][t] = 1; - else if (prob > 255) cm->fc.coef_probs_32x32[i][j][k][t] = 255; - else cm->fc.coef_probs_32x32[i][j][k][t] = prob; - } - } + update_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32, + BLOCK_TYPES_32X32, cm->fc.coef_counts_32x32, + count_sat, update_factor); #endif } diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 96d964448..6ec044606 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -60,7 +60,7 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ /* Coefficients are predicted via a 3-dimensional probability table. */ /* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */ -#define BLOCK_TYPES 4 +#define BLOCK_TYPES_4X4 4 #define BLOCK_TYPES_8X8 4 @@ -100,6 +100,13 @@ extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]); /*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */ #define PREV_COEF_CONTEXTS 4 +typedef unsigned int vp9_coeff_count[COEF_BANDS][PREV_COEF_CONTEXTS] + [MAX_ENTROPY_TOKENS]; +typedef unsigned int vp9_coeff_stats[COEF_BANDS][PREV_COEF_CONTEXTS] + [ENTROPY_NODES][2]; +typedef vp9_prob vp9_coeff_probs[COEF_BANDS][PREV_COEF_CONTEXTS] + [ENTROPY_NODES]; + #define SUBEXP_PARAM 4 /* Subexponential code parameter */ #define MODULUS_PARAM 13 /* Modulus parameter */ diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index d80498df1..cc0878c9e 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -52,14 +52,14 @@ typedef struct frame_contexts { vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1]; vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1]; - vp9_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - vp9_prob hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - vp9_prob coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - vp9_prob hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - vp9_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - vp9_prob hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_probs hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - vp9_prob coef_probs_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; #endif nmv_context nmvc; @@ -83,44 +83,24 @@ typedef struct frame_contexts { unsigned int sub_mv_ref_counts[SUBMVREF_COUNT][VP9_SUBMVREFS]; unsigned int mbsplit_counts[VP9_NUMMBSPLITS]; - vp9_prob pre_coef_probs [BLOCK_TYPES] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - vp9_prob pre_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - - vp9_prob pre_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - vp9_prob pre_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - - vp9_prob pre_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - vp9_prob pre_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - + vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_probs pre_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_probs pre_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_probs pre_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - vp9_prob pre_coef_probs_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES_32X32]; #endif - unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - - unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - - unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - + vp9_coeff_count coef_counts_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_count hybrid_coef_counts_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_count coef_counts_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_count hybrid_coef_counts_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_count coef_counts_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_count hybrid_coef_counts_16x16[BLOCK_TYPES_16X16]; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - unsigned int coef_counts_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] - [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; + vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; #endif nmv_context_counts NMVcount; diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 7f851a18a..ffdf9f371 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -1231,14 +1231,13 @@ static void init_frame(VP9D_COMP *pbi) { } -static void read_coef_probs_common( - BOOL_DECODER* const bc, - vp9_prob coef_probs[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES]) { +static void read_coef_probs_common(BOOL_DECODER* const bc, + vp9_coeff_probs *coef_probs, + int block_types) { int i, j, k, l; if (vp9_read_bit(bc)) { - for (i = 0; i < BLOCK_TYPES; i++) { + for (i = 0; i < block_types; i++) { for (j = !i; j < COEF_BANDS; j++) { /* NB: This j loop starts from 1 on block type i == 0 */ for (k = 0; k < PREV_COEF_CONTEXTS; k++) { @@ -1261,20 +1260,21 @@ static void read_coef_probs_common( static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { VP9_COMMON *const pc = &pbi->common; - read_coef_probs_common(bc, pc->fc.coef_probs); - read_coef_probs_common(bc, pc->fc.hybrid_coef_probs); + read_coef_probs_common(bc, pc->fc.coef_probs_4x4, BLOCK_TYPES_4X4); + read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_4x4, BLOCK_TYPES_4X4); if (pbi->common.txfm_mode != ONLY_4X4) { - read_coef_probs_common(bc, pc->fc.coef_probs_8x8); - read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_8x8); + read_coef_probs_common(bc, pc->fc.coef_probs_8x8, BLOCK_TYPES_8X8); + read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8); } if (pbi->common.txfm_mode > ALLOW_8X8) { - read_coef_probs_common(bc, pc->fc.coef_probs_16x16); - read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_16x16); + read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES_16X16); + read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_16x16, + BLOCK_TYPES_16X16); } #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS if (pbi->common.txfm_mode > ALLOW_16X16) { - read_coef_probs_common(bc, pc->fc.coef_probs_32x32); + read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES_32X32); } #endif } @@ -1619,10 +1619,10 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { fclose(z); } - vp9_copy(pbi->common.fc.pre_coef_probs, - pbi->common.fc.coef_probs); - vp9_copy(pbi->common.fc.pre_hybrid_coef_probs, - pbi->common.fc.hybrid_coef_probs); + vp9_copy(pbi->common.fc.pre_coef_probs_4x4, + pbi->common.fc.coef_probs_4x4); + vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_4x4, + pbi->common.fc.hybrid_coef_probs_4x4); vp9_copy(pbi->common.fc.pre_coef_probs_8x8, pbi->common.fc.coef_probs_8x8); vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_8x8, @@ -1648,8 +1648,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pbi->common.fc.pre_interintra_prob = pbi->common.fc.interintra_prob; #endif pbi->common.fc.pre_nmvc = pbi->common.fc.nmvc; - vp9_zero(pbi->common.fc.coef_counts); - vp9_zero(pbi->common.fc.hybrid_coef_counts); + vp9_zero(pbi->common.fc.coef_counts_4x4); + vp9_zero(pbi->common.fc.hybrid_coef_counts_4x4); vp9_zero(pbi->common.fc.coef_counts_8x8); vp9_zero(pbi->common.fc.hybrid_coef_counts_8x8); vp9_zero(pbi->common.fc.coef_counts_16x16); diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 35a26477a..46ccf627f 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -100,10 +100,10 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) { return v; } -#define INCREMENT_COUNT(token) \ - do { \ - coef_counts[coef_bands[c]][pt][token]++; \ - pt = vp9_prev_token_class[token]; \ +#define INCREMENT_COUNT(token) \ + do { \ + coef_counts[type][coef_bands[c]][pt][token]++; \ + pt = vp9_prev_token_class[token]; \ } while (0) #define WRITE_COEF_CONTINUE(val, token) \ @@ -130,42 +130,43 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, const int *coef_bands) { FRAME_CONTEXT *const fc = &dx->common.fc; int pt, c = (type == PLANE_TYPE_Y_NO_DC); - vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][ENTROPY_NODES], *prob; - unsigned int (*coef_counts)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; + vp9_coeff_probs *coef_probs; + vp9_prob *prob; + vp9_coeff_count *coef_counts; switch (txfm_size) { default: case TX_4X4: if (tx_type == DCT_DCT) { - coef_probs = fc->coef_probs[type]; - coef_counts = fc->coef_counts[type]; + coef_probs = fc->coef_probs_4x4; + coef_counts = fc->coef_counts_4x4; } else { - coef_probs = fc->hybrid_coef_probs[type]; - coef_counts = fc->hybrid_coef_counts[type]; + coef_probs = fc->hybrid_coef_probs_4x4; + coef_counts = fc->hybrid_coef_counts_4x4; } break; case TX_8X8: if (tx_type == DCT_DCT) { - coef_probs = fc->coef_probs_8x8[type]; - coef_counts = fc->coef_counts_8x8[type]; + coef_probs = fc->coef_probs_8x8; + coef_counts = fc->coef_counts_8x8; } else { - coef_probs = fc->hybrid_coef_probs_8x8[type]; - coef_counts = fc->hybrid_coef_counts_8x8[type]; + coef_probs = fc->hybrid_coef_probs_8x8; + coef_counts = fc->hybrid_coef_counts_8x8; } break; case TX_16X16: if (tx_type == DCT_DCT) { - coef_probs = fc->coef_probs_16x16[type]; - coef_counts = fc->coef_counts_16x16[type]; + coef_probs = fc->coef_probs_16x16; + coef_counts = fc->coef_counts_16x16; } else { - coef_probs = fc->hybrid_coef_probs_16x16[type]; - coef_counts = fc->hybrid_coef_counts_16x16[type]; + coef_probs = fc->hybrid_coef_probs_16x16; + coef_counts = fc->hybrid_coef_counts_16x16; } break; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS case TX_32X32: - coef_probs = fc->coef_probs_32x32[type]; - coef_counts = fc->coef_counts_32x32[type]; + coef_probs = fc->coef_probs_32x32; + coef_counts = fc->coef_counts_32x32; break; #endif } @@ -175,7 +176,7 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, int val; const uint8_t *cat6 = cat6_prob; if (c >= seg_eob) break; - prob = coef_probs[coef_bands[c]][pt]; + prob = coef_probs[type][coef_bands[c]][pt]; if (!vp9_read(br, prob[EOB_CONTEXT_NODE])) break; SKIP_START: @@ -183,7 +184,7 @@ SKIP_START: if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); ++c; - prob = coef_probs[coef_bands[c]][pt]; + prob = coef_probs[type][coef_bands[c]][pt]; goto SKIP_START; } // ONE_CONTEXT_NODE_0_ @@ -247,7 +248,7 @@ SKIP_START: } if (c < seg_eob) - coef_counts[coef_bands[c]][pt][DCT_EOB_TOKEN]++; + coef_counts[type][coef_bands[c]][pt][DCT_EOB_TOKEN]++; a[0] = l[0] = (c > !type); diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index e4f3228c7..471442944 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -46,7 +46,7 @@ typedef struct { INT16 *qcoeff_start_ptr; - vp9_prob const *coef_probs[BLOCK_TYPES]; + vp9_prob const *coef_probs_4x4[BLOCK_TYPES_4X4]; vp9_prob const *coef_probs_8x8[BLOCK_TYPES_8X8]; vp9_prob const *coef_probs_16X16[BLOCK_TYPES_16X16]; diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 847815f50..7cef1ae15 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -39,30 +39,13 @@ unsigned __int64 Sectionbits[500]; int intra_mode_stats[VP9_KF_BINTRAMODES] [VP9_KF_BINTRAMODES] [VP9_KF_BINTRAMODES]; -unsigned int tree_update_hist [BLOCK_TYPES] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES][2]; -unsigned int hybrid_tree_update_hist [BLOCK_TYPES] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES][2]; -unsigned int tree_update_hist_8x8 [BLOCK_TYPES_8X8] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] [2]; -unsigned int hybrid_tree_update_hist_8x8 [BLOCK_TYPES_8X8] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] [2]; -unsigned int tree_update_hist_16x16 [BLOCK_TYPES_16X16] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] [2]; -unsigned int hybrid_tree_update_hist_16x16 [BLOCK_TYPES_16X16] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] [2]; +vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES_4X4]; +vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4]; +vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES_8X8]; +vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8]; +vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES_16X16]; +vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16]; +vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32]; extern unsigned int active_section; #endif @@ -982,9 +965,9 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]); -#ifdef ENTROPY_STATS - accum_mv_refs(mode, ct); -#endif +// #ifdef ENTROPY_STATS +// accum_mv_refs(mode, ct); +// #endif } #ifdef ENTROPY_STATS @@ -1419,13 +1402,12 @@ static void write_kfmodes(VP9_COMP* const cpi, vp9_writer* const bc) { /* This function is used for debugging probability trees. */ -static void print_prob_tree(vp9_prob - coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]) { +static void print_prob_tree(vp9_coeff_probs *coef_probs) { /* print coef probability tree */ int i, j, k, l; FILE *f = fopen("enc_tree_probs.txt", "a"); fprintf(f, "{\n"); - for (i = 0; i < BLOCK_TYPES; i++) { + for (i = 0; i < BLOCK_TYPES_4X4; i++) { fprintf(f, " {\n"); for (j = 0; j < COEF_BANDS; j++) { fprintf(f, " {\n"); @@ -1445,176 +1427,95 @@ static void print_prob_tree(vp9_prob fclose(f); } -static void build_coeff_contexts(VP9_COMP *cpi) { +static void build_tree_distribution(vp9_coeff_probs *coef_probs, + vp9_coeff_count *coef_counts, +#ifdef ENTROPY_STATS + VP9_COMP *cpi, + vp9_coeff_accum *context_counters, +#endif + vp9_coeff_stats *coef_branch_ct, + int block_types) { int i = 0, j, k; #ifdef ENTROPY_STATS int t = 0; #endif - for (i = 0; i < BLOCK_TYPES; ++i) { + + for (i = 0; i < block_types; ++i) { for (j = 0; j < COEF_BANDS; ++j) { for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_coef_probs [i][j][k], - cpi->frame_branch_ct [i][j][k], - cpi->coef_counts [i][j][k], - 256, 1 - ); + vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS, + vp9_coef_encodings, vp9_coef_tree, + coef_probs[i][j][k], + coef_branch_ct[i][j][k], + coef_counts[i][j][k], 256, 1); #ifdef ENTROPY_STATS if (!cpi->dummy_packing) for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - context_counters[i][j][k][t] += cpi->coef_counts[i][j][k][t]; -#endif - } - } - } - for (i = 0; i < BLOCK_TYPES; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_hybrid_coef_probs [i][j][k], - cpi->frame_hybrid_branch_ct [i][j][k], - cpi->hybrid_coef_counts [i][j][k], - 256, 1 - ); -#ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - hybrid_context_counters[i][j][k][t] += cpi->hybrid_coef_counts[i][j][k][t]; + context_counters[i][j][k][t] += coef_counts[i][j][k][t]; #endif } } } +} - if (cpi->common.txfm_mode != ONLY_4X4) { - for (i = 0; i < BLOCK_TYPES_8X8; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - /* at every context */ - /* calc probs and branch cts for this frame only */ - // vp9_prob new_p [ENTROPY_NODES]; - // unsigned int branch_ct [ENTROPY_NODES] [2]; - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_coef_probs_8x8 [i][j][k], - cpi->frame_branch_ct_8x8 [i][j][k], - cpi->coef_counts_8x8 [i][j][k], - 256, 1 - ); +static void build_coeff_contexts(VP9_COMP *cpi) { + build_tree_distribution(cpi->frame_coef_probs_4x4, + cpi->coef_counts_4x4, #ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - context_counters_8x8[i][j][k][t] += cpi->coef_counts_8x8[i][j][k][t]; + cpi, context_counters_4x4, #endif - } - } - } - for (i = 0; i < BLOCK_TYPES_8X8; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - /* at every context */ - /* calc probs and branch cts for this frame only */ - // vp9_prob new_p [ENTROPY_NODES]; - // unsigned int branch_ct [ENTROPY_NODES] [2]; - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_hybrid_coef_probs_8x8 [i][j][k], - cpi->frame_hybrid_branch_ct_8x8 [i][j][k], - cpi->hybrid_coef_counts_8x8 [i][j][k], - 256, 1 - ); + cpi->frame_branch_ct_4x4, BLOCK_TYPES_4X4); + build_tree_distribution(cpi->frame_hybrid_coef_probs_4x4, + cpi->hybrid_coef_counts_4x4, #ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - hybrid_context_counters_8x8[i][j][k][t] += cpi->hybrid_coef_counts_8x8[i][j][k][t]; + cpi, hybrid_context_counters_4x4, #endif - } - } - } - } - - if (cpi->common.txfm_mode > ALLOW_8X8) { - for (i = 0; i < BLOCK_TYPES_16X16; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_coef_probs_16x16[i][j][k], - cpi->frame_branch_ct_16x16[i][j][k], - cpi->coef_counts_16x16[i][j][k], 256, 1); + cpi->frame_hybrid_branch_ct_4x4, BLOCK_TYPES_4X4); + build_tree_distribution(cpi->frame_coef_probs_8x8, + cpi->coef_counts_8x8, #ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - context_counters_16x16[i][j][k][t] += cpi->coef_counts_16x16[i][j][k][t]; + cpi, context_counters_8x8, #endif - } - } - } - for (i = 0; i < BLOCK_TYPES_16X16; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_hybrid_coef_probs_16x16[i][j][k], - cpi->frame_hybrid_branch_ct_16x16[i][j][k], - cpi->hybrid_coef_counts_16x16[i][j][k], 256, 1); + cpi->frame_branch_ct_8x8, BLOCK_TYPES_8X8); + build_tree_distribution(cpi->frame_hybrid_coef_probs_8x8, + cpi->hybrid_coef_counts_8x8, #ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - hybrid_context_counters_16x16[i][j][k][t] += - cpi->hybrid_coef_counts_16x16[i][j][k][t]; + cpi, hybrid_context_counters_8x8, #endif - } - } - } - } - + cpi->frame_hybrid_branch_ct_8x8, BLOCK_TYPES_8X8); + build_tree_distribution(cpi->frame_coef_probs_16x16, + cpi->coef_counts_16x16, +#ifdef ENTROPY_STATS + cpi, context_counters_16x16, +#endif + cpi->frame_branch_ct_16x16, BLOCK_TYPES_16X16); + build_tree_distribution(cpi->frame_hybrid_coef_probs_16x16, + cpi->hybrid_coef_counts_16x16, +#ifdef ENTROPY_STATS + cpi, hybrid_context_counters_16x16, +#endif + cpi->frame_hybrid_branch_ct_16x16, BLOCK_TYPES_16X16); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - if (cpi->common.txfm_mode > ALLOW_16X16) { - for (i = 0; i < BLOCK_TYPES_32X32; ++i) { - for (j = 0; j < COEF_BANDS; ++j) { - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0))) - continue; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - cpi->frame_coef_probs_32x32[i][j][k], - cpi->frame_branch_ct_32x32[i][j][k], - cpi->coef_counts_32x32[i][j][k], 256, 1); + build_tree_distribution(cpi->frame_coef_probs_32x32, + cpi->coef_counts_32x32, #ifdef ENTROPY_STATS - if (!cpi->dummy_packing) - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - context_counters_32x32[i][j][k][t] += - cpi->coef_counts_32x32[i][j][k][t]; + cpi, context_counters_32x32, #endif - } - } - } - } + cpi->frame_branch_ct_32x32, BLOCK_TYPES_32X32); #endif } -static void update_coef_probs_common( - vp9_writer* const bc, - vp9_prob new_frame_coef_probs[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES], - vp9_prob old_frame_coef_probs[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES], - unsigned int frame_branch_ct[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES][2]) { +static void update_coef_probs_common(vp9_writer* const bc, +#ifdef ENTROPY_STATS + VP9_COMP *cpi, + vp9_coeff_stats *tree_update_hist, +#endif + vp9_coeff_probs *new_frame_coef_probs, + vp9_coeff_probs *old_frame_coef_probs, + vp9_coeff_stats *frame_branch_ct, + int block_types) { int i, j, k, t; int update[2] = {0, 0}; int savings; @@ -1622,7 +1523,7 @@ static void update_coef_probs_common( /* dry run to see if there is any udpate at all needed */ savings = 0; - for (i = 0; i < BLOCK_TYPES; ++i) { + for (i = 0; i < block_types; ++i) { for (j = !i; j < COEF_BANDS; ++j) { int prev_coef_savings[ENTROPY_NODES] = {0}; for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { @@ -1666,7 +1567,7 @@ static void update_coef_probs_common( vp9_write_bit(bc, 0); } else { vp9_write_bit(bc, 1); - for (i = 0; i < BLOCK_TYPES; ++i) { + for (i = 0; i < block_types; ++i) { for (j = !i; j < COEF_BANDS; ++j) { int prev_coef_savings[ENTROPY_NODES] = {0}; for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { @@ -1696,7 +1597,7 @@ static void update_coef_probs_common( vp9_write(bc, u, upd); #ifdef ENTROPY_STATS if (!cpi->dummy_packing) - ++ tree_update_hist [i][j][k][t] [u]; + ++tree_update_hist[i][j][k][t][u]; #endif if (u) { /* send/use new probability */ @@ -1717,45 +1618,80 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { build_coeff_contexts(cpi); update_coef_probs_common(bc, - cpi->frame_coef_probs, - cpi->common.fc.coef_probs, - cpi->frame_branch_ct); +#ifdef ENTROPY_STATS + cpi, + tree_update_hist_4x4, +#endif + cpi->frame_coef_probs_4x4, + cpi->common.fc.coef_probs_4x4, + cpi->frame_branch_ct_4x4, + BLOCK_TYPES_4X4); update_coef_probs_common(bc, - cpi->frame_hybrid_coef_probs, - cpi->common.fc.hybrid_coef_probs, - cpi->frame_hybrid_branch_ct); +#ifdef ENTROPY_STATS + cpi, + hybrid_tree_update_hist_4x4, +#endif + cpi->frame_hybrid_coef_probs_4x4, + cpi->common.fc.hybrid_coef_probs_4x4, + cpi->frame_hybrid_branch_ct_4x4, + BLOCK_TYPES_4X4); /* do not do this if not even allowed */ if (cpi->common.txfm_mode != ONLY_4X4) { update_coef_probs_common(bc, +#ifdef ENTROPY_STATS + cpi, + tree_update_hist_8x8, +#endif cpi->frame_coef_probs_8x8, cpi->common.fc.coef_probs_8x8, - cpi->frame_branch_ct_8x8); + cpi->frame_branch_ct_8x8, + BLOCK_TYPES_8X8); update_coef_probs_common(bc, +#ifdef ENTROPY_STATS + cpi, + hybrid_tree_update_hist_8x8, +#endif cpi->frame_hybrid_coef_probs_8x8, cpi->common.fc.hybrid_coef_probs_8x8, - cpi->frame_hybrid_branch_ct_8x8); + cpi->frame_hybrid_branch_ct_8x8, + BLOCK_TYPES_8X8); } if (cpi->common.txfm_mode > ALLOW_8X8) { update_coef_probs_common(bc, +#ifdef ENTROPY_STATS + cpi, + tree_update_hist_16x16, +#endif cpi->frame_coef_probs_16x16, cpi->common.fc.coef_probs_16x16, - cpi->frame_branch_ct_16x16); + cpi->frame_branch_ct_16x16, + BLOCK_TYPES_16X16); update_coef_probs_common(bc, +#ifdef ENTROPY_STATS + cpi, + hybrid_tree_update_hist_16x16, +#endif cpi->frame_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16, - cpi->frame_hybrid_branch_ct_16x16); + cpi->frame_hybrid_branch_ct_16x16, + BLOCK_TYPES_16X16); } #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS if (cpi->common.txfm_mode > ALLOW_16X16) { update_coef_probs_common(bc, +#ifdef ENTROPY_STATS + cpi, + tree_update_hist_32x32, +#endif cpi->frame_coef_probs_32x32, cpi->common.fc.coef_probs_32x32, - cpi->frame_branch_ct_32x32); + cpi->frame_branch_ct_32x32, + BLOCK_TYPES_32X32); } #endif } @@ -2223,12 +2159,18 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, vp9_clear_system_state(); // __asm emms; - vp9_copy(cpi->common.fc.pre_coef_probs, cpi->common.fc.coef_probs); - vp9_copy(cpi->common.fc.pre_hybrid_coef_probs, cpi->common.fc.hybrid_coef_probs); - vp9_copy(cpi->common.fc.pre_coef_probs_8x8, cpi->common.fc.coef_probs_8x8); - vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8, cpi->common.fc.hybrid_coef_probs_8x8); - vp9_copy(cpi->common.fc.pre_coef_probs_16x16, cpi->common.fc.coef_probs_16x16); - vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16); + vp9_copy(cpi->common.fc.pre_coef_probs_4x4, + cpi->common.fc.coef_probs_4x4); + vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_4x4, + cpi->common.fc.hybrid_coef_probs_4x4); + vp9_copy(cpi->common.fc.pre_coef_probs_8x8, + cpi->common.fc.coef_probs_8x8); + vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8, + cpi->common.fc.hybrid_coef_probs_8x8); + vp9_copy(cpi->common.fc.pre_coef_probs_16x16, + cpi->common.fc.coef_probs_16x16); + vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16, + cpi->common.fc.hybrid_coef_probs_16x16); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS vp9_copy(cpi->common.fc.pre_coef_probs_32x32, cpi->common.fc.coef_probs_32x32); @@ -2362,27 +2304,22 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } #ifdef ENTROPY_STATS -void print_tree_update_probs() { +static void print_tree_update_for_type(FILE *f, + vp9_coeff_stats *tree_update_hist, + int block_types, const char *header) { int i, j, k, l; - FILE *f = fopen("coefupdprob.h", "w"); - int Sum; - fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n"); - fprintf(f, "const vp9_prob\n" - "vp9_coef_update_probs[BLOCK_TYPES]\n" - " [COEF_BANDS]\n" - " [PREV_COEF_CONTEXTS]\n" - " [ENTROPY_NODES] = {\n"); - for (i = 0; i < BLOCK_TYPES; i++) { + fprintf(f, "const vp9_coeff_prob %s = {\n", header); + for (i = 0; i < block_types; i++) { fprintf(f, " { \n"); for (j = 0; j < COEF_BANDS; j++) { fprintf(f, " {\n"); for (k = 0; k < PREV_COEF_CONTEXTS; k++) { fprintf(f, " {"); for (l = 0; l < ENTROPY_NODES; l++) { - fprintf(f, "%3ld, ", - get_binary_prob(tree_update_hist[i][j][k][l][0], - tree_update_hist[i][j][k][l][1])); + fprintf(f, "%3d, ", + get_binary_prob(tree_update_hist[i][j][k][l][0], + tree_update_hist[i][j][k][l][1])); } fprintf(f, "},\n"); } @@ -2391,56 +2328,33 @@ void print_tree_update_probs() { fprintf(f, " },\n"); } fprintf(f, "};\n"); +} - fprintf(f, "const vp9_prob\n" - "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]\n" - " [COEF_BANDS]\n" - " [PREV_COEF_CONTEXTS]\n" - " [ENTROPY_NODES] = {\n"); - for (i = 0; i < BLOCK_TYPES_8X8; i++) { - fprintf(f, " { \n"); - for (j = 0; j < COEF_BANDS; j++) { - fprintf(f, " {\n"); - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - fprintf(f, " {"); - for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) { - fprintf(f, "%3ld, ", - get_binary_prob(tree_update_hist_8x8[i][j][k][l][0], - tree_update_hist_8x8[i][j][k][l][1])); - } - fprintf(f, "},\n"); - } - fprintf(f, " },\n"); - } - fprintf(f, " },\n"); - } +void print_tree_update_probs() { + FILE *f = fopen("coefupdprob.h", "w"); + fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n"); - fprintf(f, "const vp9_prob\n" - "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]\n" - " [COEF_BANDS]\n" - " [PREV_COEF_CONTEXTS]\n" - " [ENTROPY_NODES] = {\n"); - for (i = 0; i < BLOCK_TYPES_16X16; i++) { - fprintf(f, " { \n"); - for (j = 0; j < COEF_BANDS; j++) { - fprintf(f, " {\n"); - for (k = 0; k < PREV_COEF_CONTEXTS; k++) { - fprintf(f, " {"); - for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) { - fprintf(f, "%3ld, ", - get_binary_prob(tree_update_hist_16x16[i][j][k][l][0], - tree_update_hist_16x16[i][j][k][l][1])); - } - fprintf(f, "},\n"); - } - fprintf(f, " },\n"); - } - fprintf(f, " },\n"); - } + print_tree_update_for_type(f, tree_update_hist_4x4, BLOCK_TYPES_4X4, + "vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]"); + print_tree_update_for_type(f, hybrid_tree_update_hist_4x4, BLOCK_TYPES_4X4, + "vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]"); + print_tree_update_for_type(f, tree_update_hist_8x8, BLOCK_TYPES_8X8, + "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]"); + print_tree_update_for_type(f, hybrid_tree_update_hist_8x8, BLOCK_TYPES_8X8, + "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]"); + print_tree_update_for_type(f, tree_update_hist_16x16, BLOCK_TYPES_16X16, + "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]"); + print_tree_update_for_type(f, hybrid_tree_update_hist_16x16, + BLOCK_TYPES_16X16, + "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]"); +#if CONFIG_TX32X32 + print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES_32X32, + "vp9_coef_update_probs_32x32[BLOCK_TYPES_32X32]"); +#endif fclose(f); f = fopen("treeupdate.bin", "wb"); - fwrite(tree_update_hist, sizeof(tree_update_hist), 1, f); + fwrite(tree_update_hist_4x4, sizeof(tree_update_hist_4x4), 1, f); fwrite(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f); fwrite(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f); fclose(f); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 82dc5edc1..a50fc403a 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -173,10 +173,8 @@ typedef struct macroblock { unsigned char *active_ptr; - unsigned int token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; - unsigned int hybrid_token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; + vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES_4X4]; + vp9_coeff_count hybrid_token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES_4X4]; int optimize; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f504fc53c..8b4e5bc9c 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1485,8 +1485,8 @@ static void encode_frame_internal(VP9_COMP *cpi) { xd->prev_mode_info_context = cm->prev_mi; vp9_zero(cpi->NMVcount); - vp9_zero(cpi->coef_counts); - vp9_zero(cpi->hybrid_coef_counts); + vp9_zero(cpi->coef_counts_4x4); + vp9_zero(cpi->hybrid_coef_counts_4x4); vp9_zero(cpi->coef_counts_8x8); vp9_zero(cpi->hybrid_coef_counts_8x8); vp9_zero(cpi->coef_counts_16x16); diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index c052e16b0..a3eeb29db 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -18,6 +18,7 @@ #ifdef ENTROPY_STATS extern void init_mv_ref_counts(); extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]); +void print_mode_context(void); #endif diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 779534bac..77b21e890 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -2223,11 +2223,11 @@ void vp9_remove_compressor(VP9_PTR *ptr) { fprintf(fmode, "[VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES]" "[VP9_KF_BINTRAMODES] =\n{\n"); - for (i = 0; i < VP8_KF_BINTRAMODES; i++) { + for (i = 0; i < VP9_KF_BINTRAMODES; i++) { fprintf(fmode, " { // Above Mode : %d\n", i); - for (j = 0; j < VP8_KF_BINTRAMODES; j++) { + for (j = 0; j < VP9_KF_BINTRAMODES; j++) { fprintf(fmode, " {"); @@ -3691,10 +3691,12 @@ static void encode_frame_to_data_rate #endif update_reference_frames(cm); - vp9_copy(cpi->common.fc.coef_counts, cpi->coef_counts); - vp9_copy(cpi->common.fc.hybrid_coef_counts, cpi->hybrid_coef_counts); + vp9_copy(cpi->common.fc.coef_counts_4x4, cpi->coef_counts_4x4); + vp9_copy(cpi->common.fc.hybrid_coef_counts_4x4, + cpi->hybrid_coef_counts_4x4); vp9_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8); - vp9_copy(cpi->common.fc.hybrid_coef_counts_8x8, cpi->hybrid_coef_counts_8x8); + vp9_copy(cpi->common.fc.hybrid_coef_counts_8x8, + cpi->hybrid_coef_counts_8x8); vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16); vp9_copy(cpi->common.fc.hybrid_coef_counts_16x16, cpi->hybrid_coef_counts_16x16); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 28acc96d4..4c86b6c94 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -94,24 +94,14 @@ typedef struct { // 0 = BPRED, ZERO_MV, MV, SPLIT signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; - vp9_prob coef_probs[BLOCK_TYPES] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - vp9_prob hybrid_coef_probs[BLOCK_TYPES] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - - vp9_prob coef_probs_8x8[BLOCK_TYPES_8X8] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - vp9_prob hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - - vp9_prob coef_probs_16x16[BLOCK_TYPES_16X16] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - vp9_prob hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; - + vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_probs hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - vp9_prob coef_probs_32x32[BLOCK_TYPES_32X32] - [COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; + vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; #endif #if CONFIG_SUPERBLOCKS @@ -598,31 +588,31 @@ typedef struct VP9_COMP { nmv_context_counts NMVcount; - unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; - unsigned int hybrid_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_hybrid_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_hybrid_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; + vp9_coeff_count coef_counts_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_probs frame_coef_probs_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_stats frame_branch_ct_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_count hybrid_coef_counts_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_probs frame_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]; + vp9_coeff_stats frame_hybrid_branch_ct_4x4[BLOCK_TYPES_4X4]; - unsigned int coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; - unsigned int hybrid_coef_counts_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_hybrid_coef_probs_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_hybrid_branch_ct_8x8 [BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; + vp9_coeff_count coef_counts_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_probs frame_coef_probs_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_stats frame_branch_ct_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_count hybrid_coef_counts_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_probs frame_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; + vp9_coeff_stats frame_hybrid_branch_ct_8x8[BLOCK_TYPES_8X8]; - unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; - unsigned int hybrid_coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_hybrid_coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_hybrid_branch_ct_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; + vp9_coeff_count coef_counts_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_probs frame_coef_probs_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_stats frame_branch_ct_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_count hybrid_coef_counts_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_probs frame_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; + vp9_coeff_stats frame_hybrid_branch_ct_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 - unsigned int coef_counts_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ - vp9_prob frame_coef_probs_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - unsigned int frame_branch_ct_32x32 [BLOCK_TYPES_32X32] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES_32X32]; #endif int gfu_boost; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index c896e41b1..540a68094 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -169,8 +169,8 @@ void vp9_save_coding_context(VP9_COMP *cpi) { vp9_copy(cc->last_ref_lf_deltas, xd->last_ref_lf_deltas); vp9_copy(cc->last_mode_lf_deltas, xd->last_mode_lf_deltas); - vp9_copy(cc->coef_probs, cm->fc.coef_probs); - vp9_copy(cc->hybrid_coef_probs, cm->fc.hybrid_coef_probs); + vp9_copy(cc->coef_probs_4x4, cm->fc.coef_probs_4x4); + vp9_copy(cc->hybrid_coef_probs_4x4, cm->fc.hybrid_coef_probs_4x4); vp9_copy(cc->coef_probs_8x8, cm->fc.coef_probs_8x8); vp9_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8); vp9_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16); @@ -231,8 +231,8 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { vp9_copy(xd->last_ref_lf_deltas, cc->last_ref_lf_deltas); vp9_copy(xd->last_mode_lf_deltas, cc->last_mode_lf_deltas); - vp9_copy(cm->fc.coef_probs, cc->coef_probs); - vp9_copy(cm->fc.hybrid_coef_probs, cc->hybrid_coef_probs); + vp9_copy(cm->fc.coef_probs_4x4, cc->coef_probs_4x4); + vp9_copy(cm->fc.hybrid_coef_probs_4x4, cc->hybrid_coef_probs_4x4); vp9_copy(cm->fc.coef_probs_8x8, cc->coef_probs_8x8); vp9_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8); vp9_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 60f14f8fe..931e872c1 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -239,10 +239,9 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { }; #endif -static void fill_token_costs( - unsigned int (*c)[COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS], - const vp9_prob(*p)[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES], - int block_type_counts) { +static void fill_token_costs(vp9_coeff_count *c, + vp9_coeff_probs *p, + int block_type_counts) { int i, j, k; for (i = 0; i < block_type_counts; i++) @@ -370,41 +369,24 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) { } } - fill_token_costs( - cpi->mb.token_costs[TX_4X4], - (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs, - BLOCK_TYPES); - fill_token_costs( - cpi->mb.hybrid_token_costs[TX_4X4], - (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) - cpi->common.fc.hybrid_coef_probs, - BLOCK_TYPES); + fill_token_costs(cpi->mb.token_costs[TX_4X4], + cpi->common.fc.coef_probs_4x4, BLOCK_TYPES_4X4); + fill_token_costs(cpi->mb.hybrid_token_costs[TX_4X4], + cpi->common.fc.hybrid_coef_probs_4x4, BLOCK_TYPES_4X4); - fill_token_costs( - cpi->mb.token_costs[TX_8X8], - (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_8x8, - BLOCK_TYPES_8X8); - fill_token_costs( - cpi->mb.hybrid_token_costs[TX_8X8], - (const vp9_prob( *)[8][PREV_COEF_CONTEXTS][11]) - cpi->common.fc.hybrid_coef_probs_8x8, - BLOCK_TYPES_8X8); + fill_token_costs(cpi->mb.token_costs[TX_8X8], + cpi->common.fc.coef_probs_8x8, BLOCK_TYPES_8X8); + fill_token_costs(cpi->mb.hybrid_token_costs[TX_8X8], + cpi->common.fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8); - fill_token_costs( - cpi->mb.token_costs[TX_16X16], - (const vp9_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_16x16, - BLOCK_TYPES_16X16); - fill_token_costs( - cpi->mb.hybrid_token_costs[TX_16X16], - (const vp9_prob(*)[8][PREV_COEF_CONTEXTS][11]) - cpi->common.fc.hybrid_coef_probs_16x16, - BLOCK_TYPES_16X16); + fill_token_costs(cpi->mb.token_costs[TX_16X16], + cpi->common.fc.coef_probs_16x16, BLOCK_TYPES_16X16); + fill_token_costs(cpi->mb.hybrid_token_costs[TX_16X16], + cpi->common.fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - fill_token_costs( - cpi->mb.token_costs[TX_32X32], - (const vp9_prob(*)[8][PREV_COEF_CONTEXTS][11]) cpi->common.fc.coef_probs_32x32, - BLOCK_TYPES_32X32); + fill_token_costs(cpi->mb.token_costs[TX_32X32], + cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32); #endif /*rough estimate for costing*/ diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index a662e048e..9a0e8f3d9 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -25,27 +25,25 @@ compressions, then generating vp9_context.c = initial stats. */ #ifdef ENTROPY_STATS -INT64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -INT64 hybrid_context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +vp9_coeff_accum context_counters_4x4[BLOCK_TYPES_4X4]; +vp9_coeff_accum hybrid_context_counters_4x4[BLOCK_TYPES_4X4]; +vp9_coeff_accum context_counters_8x8[BLOCK_TYPES_8X8]; +vp9_coeff_accum hybrid_context_counters_8x8[BLOCK_TYPES_8X8]; +vp9_coeff_accum context_counters_16x16[BLOCK_TYPES_16X16]; +vp9_coeff_accum hybrid_context_counters_16x16[BLOCK_TYPES_16X16]; +#if CONFIG_TX32X32 +vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32]; +#endif -INT64 context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -INT64 hybrid_context_counters_8x8[BLOCK_TYPES_8X8] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - -INT64 context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -INT64 hybrid_context_counters_16x16[BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; - -extern unsigned int tree_update_hist[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES][2]; -extern unsigned int hybrid_tree_update_hist[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES][2]; -extern unsigned int tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; -extern unsigned int hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; -extern unsigned int tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; -extern unsigned int hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2]; +extern vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES_4X4]; +extern vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4]; +extern vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES_8X8]; +extern vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8]; +extern vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES_16X16]; +extern vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16]; +#if CONFIG_TX32X32 +extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32]; +#endif #endif /* ENTROPY_STATS */ static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2]; @@ -123,8 +121,8 @@ static void tokenize_b(VP9_COMP *cpi, int seg_eob; int segment_id = xd->mode_info_context->mbmi.segment_id; const int *bands, *scan; - unsigned int (*counts)[COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; - vp9_prob (*probs)[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; + vp9_coeff_count *counts; + vp9_coeff_probs *probs; const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type(xd, b) : DCT_DCT; @@ -136,16 +134,16 @@ static void tokenize_b(VP9_COMP *cpi, bands = vp9_coef_bands; scan = vp9_default_zig_zag1d; if (tx_type != DCT_DCT) { - counts = cpi->hybrid_coef_counts; - probs = cpi->common.fc.hybrid_coef_probs; + counts = cpi->hybrid_coef_counts_4x4; + probs = cpi->common.fc.hybrid_coef_probs_4x4; if (tx_type == ADST_DCT) { scan = vp9_row_scan; } else if (tx_type == DCT_ADST) { scan = vp9_col_scan; } } else { - counts = cpi->coef_counts; - probs = cpi->common.fc.coef_probs; + counts = cpi->coef_counts_4x4; + probs = cpi->common.fc.coef_probs_4x4; } break; case TX_8X8: @@ -514,40 +512,70 @@ void vp9_tokenize_mb(VP9_COMP *cpi, void init_context_counters(void) { FILE *f = fopen("context.bin", "rb"); if (!f) { - vpx_memset(context_counters, 0, sizeof(context_counters)); + vpx_memset(context_counters_4x4, 0, sizeof(context_counters_4x4)); + vpx_memset(hybrid_context_counters_4x4, 0, + sizeof(hybrid_context_counters_4x4)); vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8)); + vpx_memset(hybrid_context_counters_8x8, 0, + sizeof(hybrid_context_counters_8x8)); vpx_memset(context_counters_16x16, 0, sizeof(context_counters_16x16)); + vpx_memset(hybrid_context_counters_16x16, 0, + sizeof(hybrid_context_counters_16x16)); +#if CONFIG_TX32X32 + vpx_memset(context_counters_32x32, 0, sizeof(context_counters_32x32)); +#endif } else { - fread(context_counters, sizeof(context_counters), 1, f); + fread(context_counters_4x4, sizeof(context_counters_4x4), 1, f); + fread(hybrid_context_counters_4x4, + sizeof(hybrid_context_counters_4x4), 1, f); fread(context_counters_8x8, sizeof(context_counters_8x8), 1, f); + fread(hybrid_context_counters_8x8, + sizeof(hybrid_context_counters_8x8), 1, f); fread(context_counters_16x16, sizeof(context_counters_16x16), 1, f); + fread(hybrid_context_counters_16x16, + sizeof(hybrid_context_counters_16x16), 1, f); +#if CONFIG_TX32X32 + fread(context_counters_32x32, sizeof(context_counters_32x32), 1, f); +#endif fclose(f); } f = fopen("treeupdate.bin", "rb"); if (!f) { - vpx_memset(tree_update_hist, 0, sizeof(tree_update_hist)); + vpx_memset(tree_update_hist_4x4, 0, sizeof(tree_update_hist_4x4)); + vpx_memset(hybrid_tree_update_hist_4x4, 0, + sizeof(hybrid_tree_update_hist_4x4)); vpx_memset(tree_update_hist_8x8, 0, sizeof(tree_update_hist_8x8)); + vpx_memset(hybrid_tree_update_hist_8x8, 0, + sizeof(hybrid_tree_update_hist_8x8)); vpx_memset(tree_update_hist_16x16, 0, sizeof(tree_update_hist_16x16)); + vpx_memset(hybrid_tree_update_hist_16x16, 0, + sizeof(hybrid_tree_update_hist_16x16)); +#if CONFIG_TX32X32 + vpx_memset(tree_update_hist_32x32, 0, sizeof(tree_update_hist_32x32)); +#endif } else { - fread(tree_update_hist, sizeof(tree_update_hist), 1, f); + fread(tree_update_hist_4x4, sizeof(tree_update_hist_4x4), 1, f); + fread(hybrid_tree_update_hist_4x4, + sizeof(hybrid_tree_update_hist_4x4), 1, f); fread(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f); + fread(hybrid_tree_update_hist_8x8, + sizeof(hybrid_tree_update_hist_8x8), 1, f); fread(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f); + fread(hybrid_tree_update_hist_16x16, + sizeof(hybrid_tree_update_hist_16x16), 1, f); +#if CONFIG_TX32X32 + fread(tree_update_hist_32x32, sizeof(tree_update_hist_32x32), 1, f); +#endif fclose(f); } } -void print_context_counters() { +static void print_counter(FILE *f, vp9_coeff_accum *context_counters, + int block_types, const char *header) { int type, band, pt, t; - FILE *f = fopen("vp9_context.c", "w"); - fprintf(f, "#include \"vp9_entropy.h\"\n"); - fprintf(f, "\n/* *** GENERATED FILE: DO NOT EDIT *** */\n\n"); - fprintf(f, "static const unsigned int\n" - "vp9_default_coef_counts[BLOCK_TYPES]\n" - " [COEF_BANDS]\n" - " [PREV_COEF_CONTEXTS]\n" - " [MAX_ENTROPY_TOKENS]={\n"); + fprintf(f, "static const vp9_coeff_count %s = {\n", header); # define Comma( X) (X? ",":"") type = 0; @@ -564,6 +592,7 @@ void print_context_counters() { do { const INT64 x = context_counters [type] [band] [pt] [t]; const int y = (int) x; + assert(x == (INT64) y); /* no overflow handling yet */ fprintf(f, "%s %d", Comma(t), y); } while (++t < MAX_ENTROPY_TOKENS); @@ -572,78 +601,16 @@ void print_context_counters() { fprintf(f, "\n }"); } while (++band < COEF_BANDS); fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES); + } while (++type < block_types); fprintf(f, "\n};\n"); +} - fprintf(f, "static const unsigned int\nvp9_default_coef_counts_8x8" - "[BLOCK_TYPES_8X8] [COEF_BANDS]" - "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {"); - type = 0; - do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - fprintf(f, "%s\n {", Comma(pt)); - t = 0; - do { - const INT64 x = context_counters_8x8 [type] [band] [pt] [t]; - const int y = (int) x; +static void print_probs(FILE *f, vp9_coeff_accum *context_counters, + int block_types, const char *header) { + int type, band, pt, t; - assert(x == (INT64) y); /* no overflow handling yet */ - fprintf(f, "%s %d", Comma(t), y); + fprintf(f, "static const vp9_coeff_probs %s = {\n", header); - } while (++t < MAX_ENTROPY_TOKENS); - - fprintf(f, "}"); - } while (++pt < PREV_COEF_CONTEXTS); - - fprintf(f, "\n }"); - - } while (++band < COEF_BANDS); - - fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES_8X8); - fprintf(f, "\n};\n"); - - fprintf(f, "static const unsigned int\nvp9_default_coef_counts_16x16" - "[BLOCK_TYPES_16X16] [COEF_BANDS]" - "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {"); - type = 0; - do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - fprintf(f, "%s\n {", Comma(pt)); - t = 0; - do { - const INT64 x = context_counters_16x16 [type] [band] [pt] [t]; - const int y = (int) x; - - assert(x == (INT64) y); /* no overflow handling yet */ - fprintf(f, "%s %d", Comma(t), y); - - } while (++t < MAX_ENTROPY_TOKENS); - - fprintf(f, "}"); - } while (++pt < PREV_COEF_CONTEXTS); - - fprintf(f, "\n }"); - - } while (++band < COEF_BANDS); - - fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES_16X16); - fprintf(f, "\n};\n"); - - fprintf(f, "static const vp9_prob\n" - "vp9_default_coef_probs[BLOCK_TYPES] [COEF_BANDS] \n" - "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {"); type = 0; do { fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); @@ -655,17 +622,18 @@ void print_context_counters() { unsigned int branch_ct [ENTROPY_NODES] [2]; unsigned int coef_counts[MAX_ENTROPY_TOKENS]; vp9_prob coef_probs[ENTROPY_NODES]; + for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - coef_counts[t] = context_counters [type] [band] [pt] [t]; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, coef_counts, 256, 1); + coef_counts[t] = context_counters[type][band][pt][t]; + vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS, + vp9_coef_encodings, vp9_coef_tree, + coef_probs, branch_ct, coef_counts, + 256, 1); fprintf(f, "%s\n {", Comma(pt)); t = 0; do { fprintf(f, "%s %d", Comma(t), coef_probs[t]); - } while (++t < ENTROPY_NODES); fprintf(f, "}"); @@ -673,81 +641,67 @@ void print_context_counters() { fprintf(f, "\n }"); } while (++band < COEF_BANDS); fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES); + } while (++type < block_types); fprintf(f, "\n};\n"); +} - fprintf(f, "static const vp9_prob\n" - "vp9_default_coef_probs_8x8[BLOCK_TYPES_8X8] [COEF_BANDS]\n" - "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {"); - type = 0; - do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - unsigned int branch_ct [ENTROPY_NODES] [2]; - unsigned int coef_counts[MAX_ENTROPY_TOKENS]; - vp9_prob coef_probs[ENTROPY_NODES]; - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - coef_counts[t] = context_counters_8x8[type] [band] [pt] [t]; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, coef_counts, 256, 1); - fprintf(f, "%s\n {", Comma(pt)); +void print_context_counters() { + FILE *f = fopen("vp9_context.c", "w"); - t = 0; - do { - fprintf(f, "%s %d", Comma(t), coef_probs[t]); - } while (++t < ENTROPY_NODES); - fprintf(f, "}"); - } while (++pt < PREV_COEF_CONTEXTS); - fprintf(f, "\n }"); - } while (++band < COEF_BANDS); - fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES_8X8); - fprintf(f, "\n};\n"); + fprintf(f, "#include \"vp9_entropy.h\"\n"); + fprintf(f, "\n/* *** GENERATED FILE: DO NOT EDIT *** */\n\n"); - fprintf(f, "static const vp9_prob\n" - "vp9_default_coef_probs_16x16[BLOCK_TYPES_16X16] [COEF_BANDS]\n" - "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {"); - type = 0; - do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - band = 0; - do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - pt = 0; - do { - unsigned int branch_ct [ENTROPY_NODES] [2]; - unsigned int coef_counts[MAX_ENTROPY_TOKENS]; - vp9_prob coef_probs[ENTROPY_NODES]; - for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) - coef_counts[t] = context_counters_16x16[type] [band] [pt] [t]; - vp9_tree_probs_from_distribution( - MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, coef_counts, 256, 1); - fprintf(f, "%s\n {", Comma(pt)); + /* print counts */ + print_counter(f, context_counters_4x4, BLOCK_TYPES_4X4, + "vp9_default_coef_counts_4x4[BLOCK_TYPES_4X4]"); + print_counter(f, hybrid_context_counters_4x4, BLOCK_TYPES_4X4, + "vp9_default_hybrid_coef_counts_4x4[BLOCK_TYPES_4X4]"); + print_counter(f, context_counters_8x8, BLOCK_TYPES_8X8, + "vp9_default_coef_counts_8x8[BLOCK_TYPES_8X8]"); + print_counter(f, hybrid_context_counters_8x8, BLOCK_TYPES_8X8, + "vp9_default_hybrid_coef_counts_8x8[BLOCK_TYPES_8X8]"); + print_counter(f, context_counters_16x16, BLOCK_TYPES_16X16, + "vp9_default_coef_counts_16x16[BLOCK_TYPES_16X16]"); + print_counter(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16, + "vp9_default_hybrid_coef_counts_16x16[BLOCK_TYPES_16X16]"); +#if CONFIG_TX32X32 + print_counter(f, context_counters_32x32, BLOCK_TYPES_32X32, + "vp9_default_coef_counts_32x32[BLOCK_TYPES_32X32]"); +#endif - t = 0; - do { - fprintf(f, "%s %d", Comma(t), coef_probs[t]); - } while (++t < ENTROPY_NODES); - fprintf(f, "}"); - } while (++pt < PREV_COEF_CONTEXTS); - fprintf(f, "\n }"); - } while (++band < COEF_BANDS); - fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES_16X16); - fprintf(f, "\n};\n"); + /* print coefficient probabilities */ + print_probs(f, context_counters_4x4, BLOCK_TYPES_4X4, + "vp9_default_coef_probs_4x4[BLOCK_TYPES_4X4]"); + print_probs(f, hybrid_context_counters_4x4, BLOCK_TYPES_4X4, + "vp9_default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]"); + print_probs(f, context_counters_8x8, BLOCK_TYPES_8X8, + "vp9_default_coef_probs_8x8[BLOCK_TYPES_8X8]"); + print_probs(f, hybrid_context_counters_8x8, BLOCK_TYPES_8X8, + "vp9_default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]"); + print_probs(f, context_counters_16x16, BLOCK_TYPES_16X16, + "vp9_default_coef_probs_16x16[BLOCK_TYPES_16X16]"); + print_probs(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16, + "vp9_default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]"); +#if CONFIG_TX32X32 + print_probs(f, context_counters_32x32, BLOCK_TYPES_32X32, + "vp9_default_coef_probs_32x32[BLOCK_TYPES_32X32]"); +#endif fclose(f); f = fopen("context.bin", "wb"); - fwrite(context_counters, sizeof(context_counters), 1, f); + fwrite(context_counters_4x4, sizeof(context_counters_4x4), 1, f); + fwrite(hybrid_context_counters_4x4, + sizeof(hybrid_context_counters_4x4), 1, f); fwrite(context_counters_8x8, sizeof(context_counters_8x8), 1, f); + fwrite(hybrid_context_counters_8x8, + sizeof(hybrid_context_counters_8x8), 1, f); fwrite(context_counters_16x16, sizeof(context_counters_16x16), 1, f); + fwrite(hybrid_context_counters_16x16, + sizeof(hybrid_context_counters_16x16), 1, f); +#if CONFIG_TX32X32 + fwrite(context_counters_32x32, sizeof(context_counters_32x32), 1, f); +#endif fclose(f); } #endif @@ -766,8 +720,8 @@ static __inline void stuff_b(VP9_COMP *cpi, TX_SIZE tx_size, int dry_run) { const int *bands; - unsigned int (*counts)[COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; - vp9_prob (*probs)[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; + vp9_coeff_count *counts; + vp9_coeff_probs *probs; int pt, band; TOKENEXTRA *t = *tp; const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? @@ -779,11 +733,11 @@ static __inline void stuff_b(VP9_COMP *cpi, case TX_4X4: bands = vp9_coef_bands; if (tx_type != DCT_DCT) { - counts = cpi->hybrid_coef_counts; - probs = cpi->common.fc.hybrid_coef_probs; + counts = cpi->hybrid_coef_counts_4x4; + probs = cpi->common.fc.hybrid_coef_probs_4x4; } else { - counts = cpi->coef_counts; - probs = cpi->common.fc.coef_probs; + counts = cpi->coef_counts_4x4; + probs = cpi->common.fc.coef_probs_4x4; } break; case TX_8X8: diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index cfd5db694..b72d5c8c2 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -29,6 +29,9 @@ typedef struct { unsigned char skip_eob_node; } TOKENEXTRA; +typedef INT64 vp9_coeff_accum[COEF_BANDS][PREV_COEF_CONTEXTS] + [MAX_ENTROPY_TOKENS]; + extern int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block); extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block); @@ -62,12 +65,14 @@ extern void vp9_fix_contexts_sb(MACROBLOCKD *xd); void init_context_counters(); void print_context_counters(); -extern INT64 context_counters[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; -extern INT64 context_counters_8x8[BLOCK_TYPES_8X8][COEF_BANDS] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; -extern INT64 context_counters_16x16[BLOCK_TYPES_16X16][COEF_BANDS] - [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; +extern vp9_coeff_accum context_counters_4x4[BLOCK_TYPES_4X4]; +extern vp9_coeff_accum context_counters_8x8[BLOCK_TYPES_8X8]; +extern vp9_coeff_accum context_counters_16x16[BLOCK_TYPES_16X16]; +extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32]; + +extern vp9_coeff_accum hybrid_context_counters_4x4[BLOCK_TYPES_4X4]; +extern vp9_coeff_accum hybrid_context_counters_8x8[BLOCK_TYPES_8X8]; +extern vp9_coeff_accum hybrid_context_counters_16x16[BLOCK_TYPES_16X16]; #endif extern const int *vp9_dct_value_cost_ptr; From fbf052df4246bb4886d055f0fcda7c97de90d360 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 7 Dec 2012 16:26:25 -0800 Subject: [PATCH 11/77] Clean up 4x4 coefficient decoding code. Don't use vp9_decode_coefs_4x4() for 2nd order DC or luma blocks. The code introduces some overhead which is unnecessary for these cases. Also, remove variable declarations that are only used once, remove magic offsets into the coefficient buffer (use xd->block[i].qcoeff instead of xd->qcoeff + magic_offset), and fix a few Google Style Guide violations. Change-Id: I0ae653fd80ca7f1e4bccd87ecef95ddfff8f28b4 --- vp9/decoder/vp9_detokenize.c | 92 ++++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 29 deletions(-) diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 46ccf627f..7434bd5f9 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -429,50 +429,77 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, return eobtotal; } -int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, - BOOL_DECODER* const bc, - PLANE_TYPE type, int i) { +static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, + BOOL_DECODER* const bc, + PLANE_TYPE type, int i, int seg_eob, + TX_TYPE tx_type, const int *scan) { ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; ENTROPY_CONTEXT *const a = A + vp9_block2above[i]; ENTROPY_CONTEXT *const l = L + vp9_block2left[i]; - INT16 *qcoeff_ptr = &xd->qcoeff[0]; - const int *scan = vp9_default_zig_zag1d; unsigned short *const eobs = xd->eobs; - int segment_id = xd->mode_info_context->mbmi.segment_id; - int c, seg_eob = get_eob(xd, segment_id, 16); - TX_TYPE tx_type = DCT_DCT; + int c; + + c = decode_coefs(dx, xd, bc, a, l, type, tx_type, seg_eob, + xd->block[i].qcoeff, scan, TX_4X4, vp9_coef_bands); + eobs[i] = c; + + return c; +} + +static int decode_coefs_4x4_y(VP9D_COMP *dx, MACROBLOCKD *xd, + BOOL_DECODER* const bc, + PLANE_TYPE type, int i, int seg_eob) { + const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type(xd, &xd->block[i]) : DCT_DCT; + const int *scan; - if (type == PLANE_TYPE_Y_WITH_DC) - tx_type = get_tx_type_4x4(xd, &xd->block[i]); switch (tx_type) { - case ADST_DCT : + case ADST_DCT: scan = vp9_row_scan; break; - - case DCT_ADST : + case DCT_ADST: scan = vp9_col_scan; break; - - default : + default: scan = vp9_default_zig_zag1d; break; } - eobs[i] = c = decode_coefs(dx, xd, bc, a, l, type, - tx_type, seg_eob, qcoeff_ptr + i * 16, - scan, TX_4X4, vp9_coef_bands); - return c; + + return decode_coefs_4x4(dx, xd, bc, type, i, seg_eob, tx_type, scan); +} + +int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, + BOOL_DECODER* const bc, + PLANE_TYPE type, int i) { + const int segment_id = xd->mode_info_context->mbmi.segment_id; + const int seg_eob = get_eob(xd, segment_id, 16); + + return decode_coefs_4x4_y(dx, xd, bc, type, i, seg_eob); +} + +static int decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, + MACROBLOCKD* const xd, + BOOL_DECODER* const bc, + int seg_eob) { + int eobtotal = 0, i; + + // chroma blocks + for (i = 16; i < 24; i++) { + eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_UV, i, seg_eob, + DCT_DCT, vp9_default_zig_zag1d); + } + + return eobtotal; } int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { - int eobtotal = 0, i; + const int segment_id = xd->mode_info_context->mbmi.segment_id; + const int seg_eob = get_eob(xd, segment_id, 16); - for (i = 16; i < 24; i++) - eobtotal += vp9_decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_UV, i); - - return eobtotal; + return decode_mb_tokens_4x4_uv(dx, xd, bc, seg_eob); } static int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx, @@ -480,11 +507,14 @@ static int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx, BOOL_DECODER* const bc) { int i, eobtotal = 0; PLANE_TYPE type; + const int segment_id = xd->mode_info_context->mbmi.segment_id; + const int seg_eob = get_eob(xd, segment_id, 16); + const int has_2nd_order = get_2nd_order_usage(xd); - int has_2nd_order = get_2nd_order_usage(xd); - + // 2nd order DC block if (has_2nd_order) { - eobtotal += vp9_decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_Y2, 24) - 16; + eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_Y2, 24, seg_eob, + DCT_DCT, vp9_default_zig_zag1d) - 16; type = PLANE_TYPE_Y_NO_DC; } else { xd->above_context->y2 = 1; @@ -493,11 +523,15 @@ static int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx, type = PLANE_TYPE_Y_WITH_DC; } + // luma blocks for (i = 0; i < 16; ++i) { - eobtotal += vp9_decode_coefs_4x4(dx, xd, bc, type, i); + eobtotal += decode_coefs_4x4_y(dx, xd, bc, type, i, seg_eob); } - return eobtotal + vp9_decode_mb_tokens_4x4_uv(dx, xd, bc); + // chroma blocks + eobtotal += decode_mb_tokens_4x4_uv(dx, xd, bc, seg_eob); + + return eobtotal; } int vp9_decode_mb_tokens(VP9D_COMP* const dx, From 6f014dc5ad805411a7fe14b627e564a5cccda20f Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Thu, 6 Dec 2012 13:56:25 -0800 Subject: [PATCH 12/77] libvpx_test: ensure rtcd init functions are called In addition to allowing tests to use the RTCD-enabled functions (perhaps transitively) without having run a full encode/decode test yet, this fixes a linking issue with Apple's G++ whereby the Common symbols (the function pointers themselves) wouldn't be resolved. Fixing this linking issue is the primary impetus for this patch, as none of the tests exercise the RTCD functionality except through the main API. Change-Id: I12aed91ca37a707e5309aa6cb9c38a649c06bc6a --- test/test_libvpx.cc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc index 2b9b0c21f..52a4fb9d5 100644 --- a/test/test_libvpx.cc +++ b/test/test_libvpx.cc @@ -12,6 +12,12 @@ #if ARCH_X86 || ARCH_X86_64 extern "C" { #include "vpx_ports/x86.h" +#if CONFIG_VP8 +extern void vp8_rtcd(); +#endif +#if CONFIG_VP9 +extern void vp9_rtcd(); +#endif } #endif #include "third_party/googletest/src/include/gtest/gtest.h" @@ -41,5 +47,12 @@ int main(int argc, char **argv) { append_gtest_filter(":-SSE4_1/*"); #endif +#if CONFIG_VP8 + vp8_rtcd(); +#endif +#if CONFIG_VP9 + vp9_rtcd(); +#endif + return RUN_ALL_TESTS(); } From ab480cede582e1ef102a0e04b61cb755df4781b7 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Wed, 28 Nov 2012 15:15:51 -0800 Subject: [PATCH 13/77] experiment with CONTEXT conversion This commit changed the ENTROPY_CONTEXT conversion between MBs that have different transform sizes. In additioin, this commit also did a number of cleanup/bug fix: 1. removed duplicate function vp9_fix_contexts() and changed to use vp8_reset_mb_token_contexts() for both encoder and decoder 2. fixed a bug in stuff_mb_16x16 where wrong context was used for the UV. 3. changed reset all context to 0 if a MB is skipped to simplify the logic. Change-Id: I7bc57a5fb6dbf1f85eac1543daaeb3a61633275c --- configure | 1 + vp9/common/vp9_blockd.h | 2 - vp9/common/vp9_entropy.h | 6 ++ vp9/decoder/vp9_decodframe.c | 5 +- vp9/decoder/vp9_detokenize.c | 87 ++++++++------- vp9/encoder/vp9_encodeframe.c | 5 +- vp9/encoder/vp9_encodemb.c | 68 ++++++------ vp9/encoder/vp9_tokenize.c | 192 ++++++++++++++++++++++------------ vp9/encoder/vp9_tokenize.h | 2 - 9 files changed, 228 insertions(+), 140 deletions(-) diff --git a/configure b/configure index c93ffd75f..55add837c 100755 --- a/configure +++ b/configure @@ -249,6 +249,7 @@ EXPERIMENT_LIST=" comp_interintra_pred tx32x32 dwt32x32hybrid + cnvcontext " CONFIG_LIST=" external_build diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 11efd4475..7440f5d6b 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -102,13 +102,11 @@ typedef enum TM_PRED, /* Truemotion prediction */ I8X8_PRED, /* 8x8 based prediction, each 8x8 has its own prediction mode */ B_PRED, /* block based prediction, each block has its own prediction mode */ - NEARESTMV, NEARMV, ZEROMV, NEWMV, SPLITMV, - MB_MODE_COUNT } MB_PREDICTION_MODE; diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 6ec044606..4262b3030 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -129,4 +129,10 @@ void vp9_coef_tree_initialize(void); void vp9_adapt_coef_probs(struct VP9Common *); +static void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { + /* Clear entropy contexts */ + vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); +} + #endif diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index ffdf9f371..d72d08698 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -14,6 +14,7 @@ #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_reconintra4x4.h" #include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_entropy.h" #include "vp9/decoder/vp9_decodframe.h" #include "vp9/decoder/vp9_detokenize.h" #include "vp9/common/vp9_invtrans.h" @@ -443,12 +444,12 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride); } - xd->above_context->y2 = 1; - xd->left_context->y2 = 1; } if (!xd->mode_info_context->mbmi.mb_skip_coeff) { vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc); } + xd->above_context->y2 = 0; + xd->left_context->y2 = 0; vp9_build_intra_predictors_mbuv(xd); pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 7434bd5f9..32746d5a0 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -15,7 +15,6 @@ #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vp9/decoder/vp9_detokenize.h" - #include "vp9/common/vp9_seg_common.h" #define EOB_CONTEXT_NODE 0 @@ -59,22 +58,6 @@ static const unsigned char cat6_prob[15] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { - /* Clear entropy contexts */ - if ((xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV) - || xd->mode_info_context->mbmi.txfm_size == TX_16X16) { - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - } else { - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1); - xd->above_context->y2 = 1; - xd->left_context->y2 = 1; - } -} - DECLARE_ALIGNED(16, extern const unsigned char, vp9_norm[256]); static int get_signed(BOOL_DECODER *br, int value_to_sign) { @@ -321,15 +304,23 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, unsigned short* const eobs = xd->eobs; const int segment_id = xd->mode_info_context->mbmi.segment_id; int c, i, eobtotal = 0, seg_eob; - // Luma block - eobs[0] = c = decode_coefs(pbi, xd, bc, A, L, PLANE_TYPE_Y_WITH_DC, + +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; + ENTROPY_CONTEXT left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; +#else + ENTROPY_CONTEXT above_ec = A[0]; + ENTROPY_CONTEXT left_ec = L[0]; +#endif + eobs[0] = c = decode_coefs(pbi, xd, bc, &above_ec, &left_ec, + PLANE_TYPE_Y_WITH_DC, get_tx_type(xd, &xd->block[0]), get_eob(xd, segment_id, 256), xd->qcoeff, vp9_default_zig_zag1d_16x16, TX_16X16, vp9_coef_bands_16x16); - A[1] = A[2] = A[3] = A[0]; - L[1] = L[2] = L[3] = L[0]; + A[1] = A[2] = A[3] = A[0] = above_ec; + L[1] = L[2] = L[3] = L[0] = left_ec; eobtotal += c; // 8x8 chroma blocks @@ -337,13 +328,21 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, for (i = 16; i < 24; i += 4) { ENTROPY_CONTEXT* const a = A + vp9_block2above_8x8[i]; ENTROPY_CONTEXT* const l = L + vp9_block2left_8x8[i]; - - eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV, +#if CONFIG_CNVCONTEXT + above_ec = (a[0] + a[1]) != 0; + left_ec = (l[0] + l[1]) != 0; +#else + above_ec = a[0]; + left_ec = l[0]; +#endif + eobs[i] = c = decode_coefs(pbi, xd, bc, + &above_ec, &left_ec, + PLANE_TYPE_UV, DCT_DCT, seg_eob, xd->block[i].qcoeff, vp9_default_zig_zag1d_8x8, TX_8X8, vp9_coef_bands_8x8); - a[1] = a[0]; - l[1] = l[0]; + a[1] = a[0] = above_ec; + l[1] = l[0] = left_ec; eobtotal += c; } A[8] = 0; @@ -374,8 +373,8 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, eobtotal += c - 4; type = PLANE_TYPE_Y_NO_DC; } else { - xd->above_context->y2 = 1; - xd->left_context->y2 = 1; + xd->above_context->y2 = 0; + xd->left_context->y2 = 0; eobs[24] = 0; type = PLANE_TYPE_Y_WITH_DC; } @@ -385,15 +384,21 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, for (i = 0; i < 16; i += 4) { ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[i]; ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[i]; - - eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, type, +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; + ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; +#else + ENTROPY_CONTEXT above_ec = a[0]; + ENTROPY_CONTEXT left_ec = l[0]; +#endif + eobs[i] = c = decode_coefs(pbi, xd, bc, &above_ec, &left_ec, type, type == PLANE_TYPE_Y_WITH_DC ? get_tx_type(xd, xd->block + i) : DCT_DCT, seg_eob, xd->block[i].qcoeff, vp9_default_zig_zag1d_8x8, TX_8X8, vp9_coef_bands_8x8); - a[1] = a[0]; - l[1] = l[0]; + a[1] = a[0] = above_ec; + l[1] = l[0] = left_ec; eobtotal += c; } @@ -415,13 +420,21 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, for (i = 16; i < 24; i += 4) { ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[i]; ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[i]; - - eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV, +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; + ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; +#else + ENTROPY_CONTEXT above_ec = a[0]; + ENTROPY_CONTEXT left_ec = l[0]; +#endif + eobs[i] = c = decode_coefs(pbi, xd, bc, + &above_ec, &left_ec, + PLANE_TYPE_UV, DCT_DCT, seg_eob, xd->block[i].qcoeff, vp9_default_zig_zag1d_8x8, TX_8X8, vp9_coef_bands_8x8); - a[1] = a[0]; - l[1] = l[0]; + a[1] = a[0] = above_ec; + l[1] = l[0] = left_ec; eobtotal += c; } } @@ -517,8 +530,8 @@ static int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx, DCT_DCT, vp9_default_zig_zag1d) - 16; type = PLANE_TYPE_Y_NO_DC; } else { - xd->above_context->y2 = 1; - xd->left_context->y2 = 1; + xd->above_context->y2 = 0; + xd->left_context->y2 = 0; xd->eobs[24] = 0; type = PLANE_TYPE_Y_WITH_DC; } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 8b4e5bc9c..b46cf2180 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -16,6 +16,7 @@ #include "vp9/common/vp9_common.h" #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/common/vp9_extend.h" +#include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/encoder/vp9_segmentation.h" @@ -2287,7 +2288,7 @@ static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mb_skip_coeff = 1; if (output_enabled) cpi->skip_true_count[mb_skip_context]++; - vp9_fix_contexts(xd); + vp9_reset_mb_tokens_context(xd); } else { vp9_stuff_mb(cpi, xd, t, !output_enabled); mbmi->mb_skip_coeff = 0; @@ -2525,7 +2526,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->common.mb_no_coeff_skip) { // TODO(rbultje) this should be done per-sb instead of per-mb? cpi->skip_true_count[mb_skip_context]++; - vp9_fix_contexts(xd); + vp9_reset_mb_tokens_context(xd); } else { vp9_stuff_mb(cpi, xd, t, 0); // TODO(rbultje) this should be done per-sb instead of per-mb? diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 46087c28e..f1ccda2f2 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -727,14 +727,21 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) { tl = (ENTROPY_CONTEXT *)&t_left; type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; for (b = 0; b < 16; b += 4) { - optimize_b(x, b, type, - ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b], - TX_8X8); - ta[vp9_block2above_8x8[b] + 1] = ta[vp9_block2above_8x8[b]]; - tl[vp9_block2left_8x8[b] + 1] = tl[vp9_block2left_8x8[b]]; + ENTROPY_CONTEXT *const a = ta + vp9_block2above_8x8[b]; + ENTROPY_CONTEXT *const l = tl + vp9_block2left_8x8[b]; +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; + ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; +#else + ENTROPY_CONTEXT above_ec = a[0]; + ENTROPY_CONTEXT left_ec = l[0]; +#endif + optimize_b(x, b, type, &above_ec, &left_ec, TX_8X8); + a[1] = a[0] = above_ec; + l[1] = l[0] = left_ec; } - // 8x8 always have 2nd roder haar block + // 8x8 always have 2nd order block if (has_2nd_order) { check_reset_8x8_2nd_coeffs(&x->e_mbd, ta + vp9_block2above_8x8[24], @@ -744,25 +751,23 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) { void vp9_optimize_mbuv_8x8(MACROBLOCK *x) { int b; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; + ENTROPY_CONTEXT *const ta = (ENTROPY_CONTEXT *)x->e_mbd.above_context; + ENTROPY_CONTEXT *const tl = (ENTROPY_CONTEXT *)x->e_mbd.left_context; - if (!x->e_mbd.above_context || !x->e_mbd.left_context) + if (!ta || !tl) return; - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - for (b = 16; b < 24; b += 4) { - optimize_b(x, b, PLANE_TYPE_UV, - ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b], - TX_8X8); - ta[vp9_block2above_8x8[b] + 1] = ta[vp9_block2above_8x8[b]]; - tl[vp9_block2left_8x8[b] + 1] = tl[vp9_block2left_8x8[b]]; + ENTROPY_CONTEXT *const a = ta + vp9_block2above_8x8[b]; + ENTROPY_CONTEXT *const l = tl + vp9_block2left_8x8[b]; +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; + ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; +#else + ENTROPY_CONTEXT above_ec = a[0]; + ENTROPY_CONTEXT left_ec = l[0]; +#endif + optimize_b(x, b, PLANE_TYPE_UV, &above_ec, &left_ec, TX_8X8); } } @@ -772,18 +777,21 @@ static void optimize_mb_8x8(MACROBLOCK *x) { } void vp9_optimize_mby_16x16(MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta, *tl; + ENTROPY_CONTEXT_PLANES *const t_above = x->e_mbd.above_context; + ENTROPY_CONTEXT_PLANES *const t_left = x->e_mbd.left_context; + ENTROPY_CONTEXT ta, tl; - if (!x->e_mbd.above_context || !x->e_mbd.left_context) + if (!t_above || !t_left) return; - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); +#if CONFIG_CNVCONTEXT + ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0; + tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0; +#else + ta = t_above->y1[0]; + tl = t_left->y1[0]; +#endif + optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, &ta, &tl, TX_16X16); } static void optimize_mb_16x16(MACROBLOCK *x) { diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 9a0e8f3d9..4d9e2f748 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -423,7 +423,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi, if (!cpi->common.mb_no_coeff_skip) { vp9_stuff_mb(cpi, xd, t, dry_run); } else { - vp9_fix_contexts(xd); + vp9_reset_mb_tokens_context(xd); } if (dry_run) *t = t_backup; @@ -434,45 +434,60 @@ void vp9_tokenize_mb(VP9_COMP *cpi, cpi->skip_false_count[mb_skip_context] += skip_inc; if (has_2nd_order) { - if (tx_size == TX_8X8) { - tokenize_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, - A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24], - TX_8X8, dry_run); - } else { - tokenize_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, - A + vp9_block2above[24], L + vp9_block2left[24], - TX_4X4, dry_run); - } - + tokenize_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, + A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24], + tx_size, dry_run); plane_type = PLANE_TYPE_Y_NO_DC; } else { - xd->above_context->y2 = 1; - xd->left_context->y2 = 1; + xd->above_context->y2 = 0; + xd->left_context->y2 = 0; plane_type = PLANE_TYPE_Y_WITH_DC; } if (tx_size == TX_16X16) { +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; + ENTROPY_CONTEXT left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; +#else + ENTROPY_CONTEXT above_ec = A[0]; + ENTROPY_CONTEXT left_ec = L[0]; +#endif tokenize_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, - A, L, TX_16X16, dry_run); - A[1] = A[2] = A[3] = A[0]; - L[1] = L[2] = L[3] = L[0]; - + &above_ec, &left_ec, TX_16X16, dry_run); + A[1] = A[2] = A[3] = A[0] = above_ec; + L[1] = L[2] = L[3] = L[0] = left_ec; for (b = 16; b < 24; b += 4) { + ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; + ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; +#if CONFIG_CNVCONTEXT + above_ec = (a[0] + a[1]) != 0; + left_ec = (l[0] + l[1]) != 0; +#else + above_ec = a[0]; + left_ec = l[0]; +#endif tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b], - TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; + &above_ec, &left_ec, TX_8X8, dry_run); + a[1] = a[0] = above_ec; + l[1] = l[0] = left_ec; } A[8] = 0; L[8] = 0; } else if (tx_size == TX_8X8) { for (b = 0; b < 16; b += 4) { + ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; + ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; + ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; +#else + ENTROPY_CONTEXT above_ec = a[0]; + ENTROPY_CONTEXT left_ec = l[0]; +#endif tokenize_b(cpi, xd, xd->block + b, t, plane_type, - A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b], - TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; + &above_ec, &left_ec, TX_8X8, dry_run); + a[1] = a[0] = above_ec; + l[1] = l[0] = left_ec; } if (xd->mode_info_context->mbmi.mode == I8X8_PRED || xd->mode_info_context->mbmi.mode == SPLITMV) { @@ -483,11 +498,19 @@ void vp9_tokenize_mb(VP9_COMP *cpi, } } else { for (b = 16; b < 24; b += 4) { + ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; + ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; + ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; +#else + ENTROPY_CONTEXT above_ec = a[0]; + ENTROPY_CONTEXT left_ec = l[0]; +#endif tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b], - TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; + &above_ec, &left_ec, TX_8X8, dry_run); + a[1] = a[0] = above_ec; + l[1] = l[0] = left_ec; } } } else { @@ -794,66 +817,97 @@ static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd, TX_8X8, dry_run); plane_type = PLANE_TYPE_Y_NO_DC; } else { - xd->above_context->y2 = 1; - xd->left_context->y2 = 1; +#if CONFIG_CNVCONTEXT + xd->above_context->y2 = 0; + xd->left_context->y2 = 0; +#endif plane_type = PLANE_TYPE_Y_WITH_DC; } for (b = 0; b < 16; b += 4) { - stuff_b(cpi, xd, xd->block + b, t, plane_type, A + vp9_block2above_8x8[b], - L + vp9_block2left_8x8[b], TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; + ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; + ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; + ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; +#else + ENTROPY_CONTEXT above_ec = a[0]; + ENTROPY_CONTEXT left_ec = l[0]; +#endif + stuff_b(cpi, xd, xd->block + b, t, plane_type, + &above_ec, &left_ec, TX_8X8, dry_run); + a[1] = a[0] = above_ec; + l[1] = l[0] = left_ec; } for (b = 16; b < 24; b += 4) { + ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; + ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; + ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; +#else + ENTROPY_CONTEXT above_ec = a[0]; + ENTROPY_CONTEXT left_ec = l[0]; +#endif stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b], - TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; + &above_ec, &left_ec, TX_8X8, dry_run); + a[1] = a[0] = above_ec; + l[1] = l[0] = left_ec; } } static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)xd->left_context; + ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; + ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; int b; +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; + ENTROPY_CONTEXT left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; +#else + ENTROPY_CONTEXT above_ec = A[0]; + ENTROPY_CONTEXT left_ec = L[0]; +#endif + stuff_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, + &above_ec, &left_ec, TX_16X16, dry_run); + A[1] = A[2] = A[3] = A[0] = above_ec; + L[1] = L[2] = L[3] = L[0] = left_ec; - stuff_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, A, L, TX_16X16, dry_run); - A[1] = A[2] = A[3] = A[0]; - L[1] = L[2] = L[3] = L[0]; for (b = 16; b < 24; b += 4) { + ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; + ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; +#if CONFIG_CNVCONTEXT + above_ec = (a[0] + a[1]) != 0; + left_ec = (l[0] + l[1]) != 0; +#else + above_ec = a[0]; + left_ec = l[0]; +#endif stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A + vp9_block2above_8x8[b], - L + vp9_block2above_8x8[b], TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; + &above_ec, &left_ec, TX_8X8, dry_run); + a[1] = a[0] = above_ec; + l[1] = l[0] = left_ec; } - vpx_memset(&A[8], 0, sizeof(A[8])); - vpx_memset(&L[8], 0, sizeof(L[8])); + A[8] = 0; + L[8] = 0; } static void stuff_mb_4x4(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context; + ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; + ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; int b; PLANE_TYPE plane_type; - int has_2nd_order = (xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV); - if (has_2nd_order && get_tx_type(xd, &xd->block[0]) != DCT_DCT) - has_2nd_order = 0; + int has_2nd_order = get_2nd_order_usage(xd); if (has_2nd_order) { stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, A + vp9_block2above[24], L + vp9_block2left[24], TX_4X4, dry_run); plane_type = PLANE_TYPE_Y_NO_DC; } else { - xd->above_context->y2 = 1; - xd->left_context->y2 = 1; + xd->above_context->y2 = 0; + xd->left_context->y2 = 0; plane_type = PLANE_TYPE_Y_WITH_DC; } @@ -880,22 +934,30 @@ static void stuff_mb_8x8_4x4uv(VP9_COMP *cpi, MACROBLOCKD *xd, TX_8X8, dry_run); plane_type = PLANE_TYPE_Y_NO_DC; } else { + xd->above_context->y2 = 0; + xd->left_context->y2 = 0; plane_type = PLANE_TYPE_Y_WITH_DC; } for (b = 0; b < 16; b += 4) { + ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; + ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; + ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; +#else + ENTROPY_CONTEXT above_ec = a[0]; + ENTROPY_CONTEXT left_ec = l[0]; +#endif stuff_b(cpi, xd, xd->block + b, t, plane_type, - A + vp9_block2above_8x8[b], L + vp9_block2left_8x8[b], - TX_8X8, dry_run); - A[vp9_block2above_8x8[b] + 1] = A[vp9_block2above_8x8[b]]; - L[vp9_block2left_8x8[b] + 1] = L[vp9_block2left_8x8[b]]; + &above_ec, &left_ec, TX_8X8, dry_run); + a[1] = a[0] = above_ec; + l[1] = l[0] = left_ec; } for (b = 16; b < 24; b++) stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, A + vp9_block2above[b], L + vp9_block2left[b], TX_4X4, dry_run); - xd->above_context->y2 = 1; - xd->left_context->y2 = 1; } void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index b72d5c8c2..17ff9b32c 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -56,11 +56,9 @@ extern void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); #endif -extern void vp9_fix_contexts(MACROBLOCKD *xd); #if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 extern void vp9_fix_contexts_sb(MACROBLOCKD *xd); #endif - #ifdef ENTROPY_STATS void init_context_counters(); void print_context_counters(); From d1244659757ff64212f7beebc6c8e45909d5113c Mon Sep 17 00:00:00 2001 From: Paul Wilkins Date: Wed, 5 Dec 2012 16:23:38 +0000 Subject: [PATCH 14/77] Further changes to mv reference code. Some further changes and refactoring of mv reference code and selection of center point for searches. Mainly relates to not passing so many different local copies of things around. Some place holder comments. Change-Id: I309f10ffe9a9cde7663e7eae19eb594371c8d055 --- vp9/common/vp9_findnearmv.c | 6 +-- vp9/common/vp9_findnearmv.h | 1 - vp9/decoder/vp9_decodemv.c | 59 +++++++++++----------- vp9/encoder/vp9_bitstream.c | 95 +++++++++-------------------------- vp9/encoder/vp9_block.h | 2 + vp9/encoder/vp9_encodeframe.c | 1 + vp9/encoder/vp9_onyx_if.c | 1 - vp9/encoder/vp9_onyx_int.h | 1 - vp9/encoder/vp9_rdopt.c | 91 ++++++++++++++++----------------- 9 files changed, 101 insertions(+), 156 deletions(-) diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index 4fd468973..903d9047e 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -127,7 +127,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, unsigned char *ref_y_buffer, int ref_y_stride, int_mv *mvlist, - int_mv *best_mv, int_mv *nearest, int_mv *near) { int i, j; @@ -144,7 +143,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int zero_seen = FALSE; // Default all to 0,0 if nothing else available - best_mv->as_int = nearest->as_int = near->as_int = 0; + nearest->as_int = near->as_int = 0; vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs)); #if CONFIG_SUBPELREFMV @@ -272,9 +271,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, clamp_mv2(&sorted_mvs[i], xd); } - // Set the best mv to the first entry in the sorted list - best_mv->as_int = sorted_mvs[0].as_int; - // Provided that there are non zero vectors available there will not // be more than one 0,0 entry in the sorted list. // The best ref mv is always set to the first entry (which gave the best diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h index 8dab55300..be55b2ad8 100644 --- a/vp9/common/vp9_findnearmv.h +++ b/vp9/common/vp9_findnearmv.h @@ -25,7 +25,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, unsigned char *ref_y_buffer, int ref_y_stride, int_mv *mvlist, - int_mv *best_mv, int_mv *nearest, int_mv *near); diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index b9f411dd2..f36a22409 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -765,10 +765,10 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int recon_y_stride, recon_yoffset; int recon_uv_stride, recon_uvoffset; + MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame; { int ref_fb_idx; - MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame; /* Select the appropriate reference frame for this MB */ if (ref_frame == LAST_FRAME) @@ -801,10 +801,13 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, xd->pre.y_buffer, recon_y_stride, mbmi->ref_mvs[ref_frame], - &best_mv, &nearest, &nearby); + &nearest, &nearby); vp9_mv_ref_probs(&pbi->common, mv_ref_p, mbmi->mb_mode_context[ref_frame]); + + best_mv = mbmi->ref_mvs[ref_frame][0]; + #ifdef DEC_DEBUG if (dec_debug) printf("[D %d %d] %d %d %d %d\n", ref_frame, @@ -826,7 +829,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->mode = read_mv_ref(bc, mv_ref_p); vp9_accum_mv_refs(&pbi->common, mbmi->mode, - mbmi->mb_mode_context[mbmi->ref_frame]); + mbmi->mb_mode_context[ref_frame]); } #if CONFIG_PRED_FILTER @@ -890,9 +893,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, xd->second_pre.y_buffer, recon_y_stride, mbmi->ref_mvs[mbmi->second_ref_frame], - &best_mv_second, &nearest_second, &nearby_second); + best_mv_second = mbmi->ref_mvs[mbmi->second_ref_frame][0]; } } else { @@ -925,6 +928,29 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, #endif } +#if CONFIG_NEW_MVREF + // if ((mbmi->mode == NEWMV) || (mbmi->mode == SPLITMV)) + if (mbmi->mode == NEWMV) { + int best_index; + MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame; + + // Encode the index of the choice. + best_index = + vp9_read_mv_ref_id(bc, xd->mb_mv_ref_id_probs[ref_frame]); + + best_mv.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int; + + if (mbmi->second_ref_frame > 0) { + ref_frame = mbmi->second_ref_frame; + + // Encode the index of the choice. + best_index = + vp9_read_mv_ref_id(bc, xd->mb_mv_ref_id_probs[ref_frame]); + best_mv_second.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int; + } + } +#endif + mbmi->uv_mode = DC_PRED; switch (mbmi->mode) { case SPLITMV: { @@ -1081,19 +1107,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, case NEWMV: -#if CONFIG_NEW_MVREF - { - int best_index; - MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame; - - // Encode the index of the choice. - best_index = - vp9_read_mv_ref_id(bc, xd->mb_mv_ref_id_probs[ref_frame]); - - best_mv.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int; - } -#endif - read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc); read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc, xd->allow_high_precision_mv); @@ -1115,18 +1128,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mb_to_bottom_edge); if (mbmi->second_ref_frame > 0) { -#if CONFIG_NEW_MVREF - { - int best_index; - MV_REFERENCE_FRAME ref_frame = mbmi->second_ref_frame; - - // Encode the index of the choice. - best_index = - vp9_read_mv_ref_id(bc, xd->mb_mv_ref_id_probs[ref_frame]); - best_mv_second.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int; - } -#endif - read_nmv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc); read_nmv_fp(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc, xd->allow_high_precision_mv); diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 7cef1ae15..09d0e9929 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -956,21 +956,14 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { pc->fc.uv_mode_prob[mode]); } } else { - int_mv best_mv, best_second_mv; - vp9_prob mv_ref_p [VP9_MVREFS - 1]; - { - best_mv.as_int = mi->ref_mvs[rf][0].as_int; + vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]); - vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]); // #ifdef ENTROPY_STATS -// accum_mv_refs(mode, ct); -// #endif - } - #ifdef ENTROPY_STATS + accum_mv_refs(mode, ct); active_section = 3; #endif @@ -1012,13 +1005,6 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { } } - if (mi->second_ref_frame > 0 && - (mode == NEWMV || mode == SPLITMV)) { - - best_second_mv.as_int = - mi->ref_mvs[mi->second_ref_frame][0].as_int; - } - // does the feature use compound prediction or not // (if not specified at the frame/segment level) if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { @@ -1047,64 +1033,37 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { } #endif +#if CONFIG_NEW_MVREF + // if ((mode == NEWMV) || (mode == SPLITMV)) { + if (mode == NEWMV) { + // Encode the index of the choice. + vp9_write_mv_ref_id(bc, + xd->mb_mv_ref_id_probs[rf], mi->best_index); + cpi->best_ref_index_counts[rf][mi->best_index]++; + + if (mi->second_ref_frame > 0) { + // Encode the index of the choice. + vp9_write_mv_ref_id( + bc, xd->mb_mv_ref_id_probs[mi->second_ref_frame], + mi->best_second_index); + + cpi->best_ref_index_counts[mi->second_ref_frame] + [mi->best_second_index]++; + } + } +#endif { switch (mode) { /* new, split require MVs */ case NEWMV: #ifdef ENTROPY_STATS active_section = 5; #endif - -#if CONFIG_NEW_MVREF - { - unsigned int best_index; - - // Choose the best mv reference - /* - best_index = pick_best_mv_ref(x, rf, mi->mv[0], - mi->ref_mvs[rf], &best_mv); - assert(best_index == mi->best_index); - assert(best_mv.as_int == mi->best_mv.as_int); - */ - best_index = mi->best_index; - best_mv.as_int = mi->best_mv.as_int; - - // Encode the index of the choice. - vp9_write_mv_ref_id(bc, - xd->mb_mv_ref_id_probs[rf], best_index); - - cpi->best_ref_index_counts[rf][best_index]++; - - } -#endif - - write_nmv(bc, &mi->mv[0].as_mv, &best_mv, + write_nmv(bc, &mi->mv[0].as_mv, &mi->best_mv, (const nmv_context*) nmvc, xd->allow_high_precision_mv); if (mi->second_ref_frame > 0) { -#if CONFIG_NEW_MVREF - unsigned int best_index; - sec_ref_frame = mi->second_ref_frame; - - /* - best_index = - pick_best_mv_ref(x, sec_ref_frame, mi->mv[1], - mi->ref_mvs[sec_ref_frame], - &best_second_mv); - assert(best_index == mi->best_second_index); - assert(best_second_mv.as_int == mi->best_second_mv.as_int); - */ - best_index = mi->best_second_index; - best_second_mv.as_int = mi->best_second_mv.as_int; - - // Encode the index of the choice. - vp9_write_mv_ref_id(bc, - xd->mb_mv_ref_id_probs[sec_ref_frame], - best_index); - - cpi->best_ref_index_counts[sec_ref_frame][best_index]++; -#endif - write_nmv(bc, &mi->mv[1].as_mv, &best_second_mv, + write_nmv(bc, &mi->mv[1].as_mv, &mi->best_second_mv, (const nmv_context*) nmvc, xd->allow_high_precision_mv); } @@ -1148,14 +1107,14 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { #ifdef ENTROPY_STATS active_section = 11; #endif - write_nmv(bc, &blockmv.as_mv, &best_mv, + write_nmv(bc, &blockmv.as_mv, &mi->best_mv, (const nmv_context*) nmvc, xd->allow_high_precision_mv); if (mi->second_ref_frame > 0) { write_nmv(bc, &cpi->mb.partition_info->bmi[j].second_mv.as_mv, - &best_second_mv, + &mi->best_second_mv, (const nmv_context*) nmvc, xd->allow_high_precision_mv); } @@ -1167,10 +1126,6 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { break; } } - /* This is not required if the counts in cpi are consistent with the - * final packing pass */ - // if (!cpi->dummy_packing) - // vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv); } if (((rf == INTRA_FRAME && mode <= I8X8_PRED) || diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index a50fc403a..4ad095fb9 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -135,6 +135,8 @@ typedef struct macroblock { int *mb_norm_activity_ptr; signed int act_zbin_adj; + int mv_best_ref_index[MAX_REF_FRAMES]; + int nmvjointcost[MV_JOINTS]; int nmvcosts[2][MV_VALS]; int *nmvcost[2]; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 8b4e5bc9c..ecf73a9b3 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -422,6 +422,7 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x, } } + // best_index = x->mv_best_ref_index[ref_frame]; best_ref->as_int = mv_ref_list[best_index].as_int; return best_index; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 77b21e890..bdc39c1af 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -668,7 +668,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - sf->improved_mv_pred = 1; // default thresholds to 0 for (i = 0; i < MAX_MODES; i++) diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 4c86b6c94..2406138f6 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -359,7 +359,6 @@ typedef struct { int first_step; int optimize_coefficients; int no_skip_block4x4_search; - int improved_mv_pred; int search_best_filter; } SPEED_FEATURES; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 931e872c1..9b87713f9 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2916,7 +2916,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, unsigned char *ref_y_buffer, int ref_y_stride, - int_mv *mvp, int ref_frame, enum BlockSize block_size ) { + int ref_frame, enum BlockSize block_size ) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int_mv this_mv; @@ -2956,9 +2956,8 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, } } - // Return the mv that had the best sad for use in the motion search. - mvp->as_int = mbmi->ref_mvs[ref_frame][best_index].as_int; - clamp_mv2(mvp, xd); + // Note the index of the mv that worked best in the reference list. + x->mv_best_ref_index[ref_frame] = best_index; } static void set_i8x8_block_modes(MACROBLOCK *x, int modes[2][4]) { @@ -3140,9 +3139,6 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, ctx->best_ref_mv.as_int = ref_mv->as_int; ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; - // ctx[mb_index].rddiv = x->rddiv; - // ctx[mb_index].rdmult = x->rdmult; - ctx->single_pred_diff = comp_pred_diff[SINGLE_PREDICTION_ONLY]; ctx->comp_pred_diff = comp_pred_diff[COMP_PREDICTION_ONLY]; ctx->hybrid_pred_diff = comp_pred_diff[HYBRID_PREDICTION]; @@ -3185,8 +3181,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int_mv frame_nearest_mv[MAX_REF_FRAMES], int_mv frame_near_mv[MAX_REF_FRAMES], - int_mv frame_best_ref_mv[MAX_REF_FRAMES], - int_mv mv_search_ref[MAX_REF_FRAMES], int frame_mdcounts[4][4], unsigned char *y_buffer[4], unsigned char *u_buffer[4], @@ -3210,7 +3204,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, vp9_find_best_ref_mvs(xd, y_buffer[frame_type], yv12->y_stride, mbmi->ref_mvs[frame_type], - &frame_best_ref_mv[frame_type], &frame_nearest_mv[frame_type], &frame_near_mv[frame_type]); @@ -3218,7 +3211,7 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, // Further refinement that is encode side only to test the top few candidates // in full and choose the best as the centre point for subsequent searches. mv_pred(cpi, x, y_buffer[frame_type], yv12->y_stride, - &mv_search_ref[frame_type], frame_type, block_size); + frame_type, block_size); #if CONFIG_NEW_MVREF // TODO(paulwilkins): Final choice of which of the best 4 candidates from @@ -3240,9 +3233,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate_uv, int *distortion_uv, int *mode_excluded, int *disable_skip, int recon_yoffset, int mode_index, - int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], - int_mv frame_best_ref_mv[MAX_REF_FRAMES], - int_mv mv_search_ref[MAX_REF_FRAMES]) { + int_mv frame_mv[MB_MODE_COUNT] + [MAX_REF_FRAMES]) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; @@ -3258,20 +3250,25 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int refs[2] = { mbmi->ref_frame, (mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame) }; int_mv cur_mv[2]; + int_mv ref_mv[2]; int64_t this_rd = 0; switch (this_mode) { case NEWMV: + ref_mv[0] = mbmi->ref_mvs[refs[0]][0]; + ref_mv[1] = mbmi->ref_mvs[refs[1]][0]; + // ref_mv[0] = mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]]; + // ref_mv[1] = mbmi->ref_mvs[refs[1]][x->mv_best_ref_index[refs[1]]]; if (is_comp_pred) { if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV || frame_mv[NEWMV][refs[1]].as_int == INVALID_MV) return INT64_MAX; *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[0]], - &frame_best_ref_mv[refs[0]], + &ref_mv[0], x->nmvjointcost, x->mvcost, 96, x->e_mbd.allow_high_precision_mv); *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[1]], - &frame_best_ref_mv[refs[1]], + &ref_mv[1], x->nmvjointcost, x->mvcost, 96, x->e_mbd.allow_high_precision_mv); } else { @@ -3286,10 +3283,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int tmp_row_min = x->mv_row_min; int tmp_row_max = x->mv_row_max; - vp9_clamp_mv_min_max(x, &frame_best_ref_mv[refs[0]]); + vp9_clamp_mv_min_max(x, &ref_mv[0]); - mvp_full.as_mv.col = mv_search_ref[mbmi->ref_frame].as_mv.col >> 3; - mvp_full.as_mv.row = mv_search_ref[mbmi->ref_frame].as_mv.row >> 3; + mvp_full.as_int = + mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]].as_int; + mvp_full.as_mv.col >>= 3; + mvp_full.as_mv.row >>= 3; + if (mvp_full.as_int != mvp_full.as_int) { + mvp_full.as_int = mvp_full.as_int; + } // adjust search range according to sr from mv prediction step_param = MAX(step_param, sr); @@ -3300,7 +3302,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, bestsme = vp9_full_pixel_diamond(cpi, x, b, d, &mvp_full, step_param, sadpb, further_steps, 1, &cpi->fn_ptr[block_size], - &frame_best_ref_mv[refs[0]], &tmp_mv); + &ref_mv[0], &tmp_mv); x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; @@ -3311,7 +3313,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; cpi->find_fractional_mv_step(x, b, d, &tmp_mv, - &frame_best_ref_mv[refs[0]], + &ref_mv[0], x->errorperbit, &cpi->fn_ptr[block_size], x->nmvjointcost, x->mvcost, @@ -3321,7 +3323,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv.first.as_int; // Add the new motion vector cost to our rolling cost variable - *rate2 += vp9_mv_bit_cost(&tmp_mv, &frame_best_ref_mv[refs[0]], + *rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0], x->nmvjointcost, x->mvcost, 96, xd->allow_high_precision_mv); } @@ -3554,8 +3556,6 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int saddone = 0; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - int_mv frame_best_ref_mv[MAX_REF_FRAMES]; - int_mv mv_search_ref[MAX_REF_FRAMES]; int frame_mdcounts[4][4]; unsigned char *y_buffer[4], *u_buffer[4], *v_buffer[4]; @@ -3586,25 +3586,22 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->ref_frame_flags & VP9_LAST_FLAG) { setup_buffer_inter(cpi, x, cpi->common.lst_fb_idx, LAST_FRAME, BLOCK_16X16, recon_yoffset, recon_uvoffset, - frame_mv[NEARESTMV], frame_mv[NEARMV], frame_best_ref_mv, - mv_search_ref, frame_mdcounts, - y_buffer, u_buffer, v_buffer); + frame_mv[NEARESTMV], frame_mv[NEARMV], + frame_mdcounts, y_buffer, u_buffer, v_buffer); } if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { setup_buffer_inter(cpi, x, cpi->common.gld_fb_idx, GOLDEN_FRAME, BLOCK_16X16, recon_yoffset, recon_uvoffset, - frame_mv[NEARESTMV], frame_mv[NEARMV], frame_best_ref_mv, - mv_search_ref, frame_mdcounts, - y_buffer, u_buffer, v_buffer); + frame_mv[NEARESTMV], frame_mv[NEARMV], + frame_mdcounts, y_buffer, u_buffer, v_buffer); } if (cpi->ref_frame_flags & VP9_ALT_FLAG) { setup_buffer_inter(cpi, x, cpi->common.alt_fb_idx, ALTREF_FRAME, BLOCK_16X16, recon_yoffset, recon_uvoffset, - frame_mv[NEARESTMV], frame_mv[NEARMV], frame_best_ref_mv, - mv_search_ref, frame_mdcounts, - y_buffer, u_buffer, v_buffer); + frame_mv[NEARESTMV], frame_mv[NEARMV], + frame_mdcounts, y_buffer, u_buffer, v_buffer); } *returnintra = INT64_MAX; @@ -3720,7 +3717,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->pre.y_buffer = y_buffer[ref]; xd->pre.u_buffer = u_buffer[ref]; xd->pre.v_buffer = v_buffer[ref]; - best_ref_mv = frame_best_ref_mv[ref]; + best_ref_mv = mbmi->ref_mvs[ref][0]; vpx_memcpy(mdcounts, frame_mdcounts[ref], sizeof(mdcounts)); } @@ -3730,7 +3727,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->second_pre.y_buffer = y_buffer[ref]; xd->second_pre.u_buffer = u_buffer[ref]; xd->second_pre.v_buffer = v_buffer[ref]; - second_best_ref_mv = frame_best_ref_mv[ref]; + second_best_ref_mv = mbmi->ref_mvs[ref][0]; } // Experimental code. Special case for gf and arf zeromv modes. @@ -3982,8 +3979,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &rate_y, &distortion, &rate_uv, &distortion_uv, &mode_excluded, &disable_skip, recon_yoffset, - mode_index, frame_mv, frame_best_ref_mv, - mv_search_ref); + mode_index, frame_mv); if (this_rd == INT64_MAX) continue; } @@ -4299,9 +4295,9 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, end: store_coding_context( x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition, - &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame], - &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame < 0 ? - 0 : xd->mode_info_context->mbmi.second_ref_frame], + &mbmi->ref_mvs[mbmi->ref_frame][0], + &mbmi->ref_mvs[mbmi->second_ref_frame < 0 + ? 0 : mbmi->second_ref_frame][0], best_pred_diff, best_txfm_diff); } @@ -4504,8 +4500,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, unsigned char segment_id = xd->mode_info_context->mbmi.segment_id; int comp_pred, i; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - int_mv frame_best_ref_mv[MAX_REF_FRAMES]; - int_mv mv_search_ref[MAX_REF_FRAMES]; int frame_mdcounts[4][4]; unsigned char *y_buffer[4]; unsigned char *u_buffer[4]; @@ -4557,8 +4551,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->ref_frame_flags & flag_list[ref_frame]) { setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, BLOCK_32X32, recon_yoffset, recon_uvoffset, frame_mv[NEARESTMV], - frame_mv[NEARMV], frame_best_ref_mv, mv_search_ref, - frame_mdcounts, y_buffer, u_buffer, v_buffer); + frame_mv[NEARMV], frame_mdcounts, + y_buffer, u_buffer, v_buffer); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; @@ -4750,8 +4744,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &rate_y, &distortion_y, &rate_uv, &distortion_uv, &mode_excluded, &disable_skip, recon_yoffset, - mode_index, frame_mv, frame_best_ref_mv, - mv_search_ref); + mode_index, frame_mv); if (this_rd == INT64_MAX) continue; } @@ -4996,9 +4989,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, end: store_coding_context(x, &x->sb_context[0], best_mode_index, NULL, - &frame_best_ref_mv[mbmi->ref_frame], - &frame_best_ref_mv[mbmi->second_ref_frame < 0 ? - 0 : mbmi->second_ref_frame], + &mbmi->ref_mvs[mbmi->ref_frame][0], + &mbmi->ref_mvs[mbmi->second_ref_frame < 0 + ? 0 : mbmi->second_ref_frame][0], best_pred_diff, best_txfm_diff); return best_rd; From 14a38a87356875bb57206d333c4ec59409c4451e Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Mon, 10 Dec 2012 12:10:36 -0800 Subject: [PATCH 15/77] A bug fix related to switchable filters The switchable count update was mistakenly inside a macro. Change-Id: Iec04c52ad57034b88312dbaf05eee1f47ce265b3 --- vp9/encoder/vp9_encodeframe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 8b4e5bc9c..f5e9d7721 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -581,6 +581,7 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, ++cpi->interintra_count[0]; } } +#endif if (cpi->common.mcomp_filter_type == SWITCHABLE && mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV) { @@ -588,7 +589,6 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] [vp9_switchable_interp_map[mbmi->interp_filter]]; } -#endif cpi->prediction_error += ctx->distortion; cpi->intra_error += ctx->intra_error; From 899f0fc1268dd5665770031f308a3ed52a0de956 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Thu, 6 Dec 2012 12:40:57 -0800 Subject: [PATCH 16/77] clean up tokenize_b() and stuff_b() Change-Id: I0c1be01aae933243311ad321b6c456adaec1a0f5 --- vp9/common/vp9_blockd.c | 27 ++-- vp9/common/vp9_blockd.h | 21 ++- vp9/decoder/vp9_detokenize.c | 24 ++-- vp9/encoder/vp9_encodemb.c | 24 ++-- vp9/encoder/vp9_rdopt.c | 88 ++++++------ vp9/encoder/vp9_tokenize.c | 269 ++++++++++++----------------------- 6 files changed, 188 insertions(+), 265 deletions(-) diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c index 60aedf8cb..4c88a4fa2 100644 --- a/vp9/common/vp9_blockd.c +++ b/vp9/common/vp9_blockd.c @@ -12,18 +12,19 @@ #include "vp9/common/vp9_blockd.h" #include "vpx_mem/vpx_mem.h" - -const unsigned char vp9_block2left[25] = { - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 +const unsigned char vp9_block2left[TX_SIZE_MAX_SB][25] = { + {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8}, + {0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}, +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8} +#endif }; -const unsigned char vp9_block2above[25] = { - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8 +const unsigned char vp9_block2above[TX_SIZE_MAX_SB][25] = { + {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8}, + {0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}, +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8} +#endif }; - -const unsigned char vp9_block2left_8x8[25] = { - 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8 -}; -const unsigned char vp9_block2above_8x8[25] = { - 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8 -}; - diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 7440f5d6b..4306eb02a 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -66,11 +66,6 @@ typedef struct { ENTROPY_CONTEXT y2; } ENTROPY_CONTEXT_PLANES; -extern const unsigned char vp9_block2left[25]; -extern const unsigned char vp9_block2above[25]; -extern const unsigned char vp9_block2left_8x8[25]; -extern const unsigned char vp9_block2above_8x8[25]; - #define VP9_COMBINEENTROPYCONTEXTS( Dest, A, B) \ Dest = ((A)!=0) + ((B)!=0); @@ -119,18 +114,17 @@ typedef enum { SEG_LVL_EOB = 4, // EOB end stop marker. SEG_LVL_TRANSFORM = 5, // Block transform size. SEG_LVL_MAX = 6 // Number of MB level features supported - } SEG_LVL_FEATURES; // Segment level features. typedef enum { - TX_4X4, // 4x4 dct transform - TX_8X8, // 8x8 dct transform - TX_16X16, // 16x16 dct transform - TX_SIZE_MAX_MB, // Number of transforms available to MBs + TX_4X4 = 0, // 4x4 dct transform + TX_8X8 = 1, // 8x8 dct transform + TX_16X16 = 2, // 16x16 dct transform + TX_SIZE_MAX_MB = 3, // Number of different transforms available #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - TX_32X32 = TX_SIZE_MAX_MB, // 32x32 dct transform - TX_SIZE_MAX_SB, // Number of transforms available to SBs + TX_32X32 = TX_SIZE_MAX_MB, // 32x32 dct transform + TX_SIZE_MAX_SB, // Number of transforms available to SBs #else TX_SIZE_MAX_SB = TX_SIZE_MAX_MB, #endif @@ -508,6 +502,9 @@ static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) { return tx_type; } +extern const unsigned char vp9_block2left[TX_SIZE_MAX_SB][25]; +extern const unsigned char vp9_block2above[TX_SIZE_MAX_SB][25]; + #define USE_ADST_FOR_I16X16_8X8 0 #define USE_ADST_FOR_I16X16_4X4 0 #define USE_ADST_FOR_I8X8_4X4 1 diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 32746d5a0..0c822d429 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -326,8 +326,8 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, // 8x8 chroma blocks seg_eob = get_eob(xd, segment_id, 64); for (i = 16; i < 24; i += 4) { - ENTROPY_CONTEXT* const a = A + vp9_block2above_8x8[i]; - ENTROPY_CONTEXT* const l = L + vp9_block2left_8x8[i]; + ENTROPY_CONTEXT* const a = A + vp9_block2above[TX_8X8][i]; + ENTROPY_CONTEXT* const l = L + vp9_block2left[TX_8X8][i]; #if CONFIG_CNVCONTEXT above_ec = (a[0] + a[1]) != 0; left_ec = (l[0] + l[1]) != 0; @@ -363,8 +363,8 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, int has_2nd_order = get_2nd_order_usage(xd); // 2nd order DC block if (has_2nd_order) { - ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[24]; - ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[24]; + ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_8X8][24]; + ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_8X8][24]; eobs[24] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_Y2, DCT_DCT, get_eob(xd, segment_id, 4), @@ -382,8 +382,8 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, // luma blocks seg_eob = get_eob(xd, segment_id, 64); for (i = 0; i < 16; i += 4) { - ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[i]; - ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[i]; + ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_8X8][i]; + ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_8X8][i]; #if CONFIG_CNVCONTEXT ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; @@ -408,8 +408,8 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, // use 4x4 transform for U, V components in I8X8/splitmv prediction mode seg_eob = get_eob(xd, segment_id, 16); for (i = 16; i < 24; i++) { - ENTROPY_CONTEXT *const a = A + vp9_block2above[i]; - ENTROPY_CONTEXT *const l = L + vp9_block2left[i]; + ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_4X4][i]; + ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_4X4][i]; eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV, DCT_DCT, seg_eob, xd->block[i].qcoeff, @@ -418,8 +418,8 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, } } else { for (i = 16; i < 24; i += 4) { - ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[i]; - ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[i]; + ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_8X8][i]; + ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_8X8][i]; #if CONFIG_CNVCONTEXT ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; @@ -448,8 +448,8 @@ static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, TX_TYPE tx_type, const int *scan) { ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; - ENTROPY_CONTEXT *const a = A + vp9_block2above[i]; - ENTROPY_CONTEXT *const l = L + vp9_block2left[i]; + ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_4X4][i]; + ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_4X4][i]; unsigned short *const eobs = xd->eobs; int c; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index f1ccda2f2..497509995 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -671,15 +671,18 @@ void vp9_optimize_mby_4x4(MACROBLOCK *x) { for (b = 0; b < 16; b++) { optimize_b(x, b, type, - ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4); + ta + vp9_block2above[TX_4X4][b], + tl + vp9_block2left[TX_4X4][b], TX_4X4); } if (has_2nd_order) { b = 24; optimize_b(x, b, PLANE_TYPE_Y2, - ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4); + ta + vp9_block2above[TX_4X4][b], + tl + vp9_block2left[TX_4X4][b], TX_4X4); check_reset_2nd_coeffs(&x->e_mbd, - ta + vp9_block2above[b], tl + vp9_block2left[b]); + ta + vp9_block2above[TX_4X4][b], + tl + vp9_block2left[TX_4X4][b]); } } @@ -700,7 +703,8 @@ void vp9_optimize_mbuv_4x4(MACROBLOCK *x) { for (b = 16; b < 24; b++) { optimize_b(x, b, PLANE_TYPE_UV, - ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4); + ta + vp9_block2above[TX_4X4][b], + tl + vp9_block2left[TX_4X4][b], TX_4X4); } } @@ -727,8 +731,8 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) { tl = (ENTROPY_CONTEXT *)&t_left; type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; for (b = 0; b < 16; b += 4) { - ENTROPY_CONTEXT *const a = ta + vp9_block2above_8x8[b]; - ENTROPY_CONTEXT *const l = tl + vp9_block2left_8x8[b]; + ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b]; + ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; #if CONFIG_CNVCONTEXT ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; @@ -744,8 +748,8 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) { // 8x8 always have 2nd order block if (has_2nd_order) { check_reset_8x8_2nd_coeffs(&x->e_mbd, - ta + vp9_block2above_8x8[24], - tl + vp9_block2left_8x8[24]); + ta + vp9_block2above[TX_8X8][24], + tl + vp9_block2left[TX_8X8][24]); } } @@ -758,8 +762,8 @@ void vp9_optimize_mbuv_8x8(MACROBLOCK *x) { return; for (b = 16; b < 24; b += 4) { - ENTROPY_CONTEXT *const a = ta + vp9_block2above_8x8[b]; - ENTROPY_CONTEXT *const l = tl + vp9_block2left_8x8[b]; + ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b]; + ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; #if CONFIG_CNVCONTEXT ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 931e872c1..3c72e2780 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -681,12 +681,14 @@ static int rdcost_mby_4x4(MACROBLOCK *mb, int has_2nd_order, int backup) { cost += cost_coeffs(mb, xd->block + b, (has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC), - ta + vp9_block2above[b], tl + vp9_block2left[b], + ta + vp9_block2above[TX_4X4][b], + tl + vp9_block2left[TX_4X4][b], TX_4X4); if (has_2nd_order) cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2, - ta + vp9_block2above[24], tl + vp9_block2left[24], + ta + vp9_block2above[TX_4X4][24], + tl + vp9_block2left[TX_4X4][24], TX_4X4); return cost; @@ -739,12 +741,14 @@ static int rdcost_mby_8x8(MACROBLOCK *mb, int has_2nd_order, int backup) { cost += cost_coeffs(mb, xd->block + b, (has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC), - ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b], + ta + vp9_block2above[TX_8X8][b], + tl + vp9_block2left[TX_8X8][b], TX_8X8); if (has_2nd_order) cost += cost_coeffs_2x2(mb, xd->block + 24, PLANE_TYPE_Y2, - ta + vp9_block2above[24], tl + vp9_block2left[24]); + ta + vp9_block2above[TX_8X8][24], + tl + vp9_block2left[TX_8X8][24]); return cost; } @@ -1294,8 +1298,8 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rat #if CONFIG_COMP_INTRA_PRED & best_second_mode, allow_comp, #endif - bmode_costs, ta + vp9_block2above[i], - tl + vp9_block2left[i], &r, &ry, &d); + bmode_costs, ta + vp9_block2above[TX_4X4][i], + tl + vp9_block2left[TX_4X4][i], &r, &ry, &d); cost += r; distortion += d; @@ -1527,8 +1531,8 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, // compute quantization mse of 8x8 block distortion = vp9_block_error_c((x->block + idx)->coeff, (xd->block + idx)->dqcoeff, 64); - ta0 = a[vp9_block2above_8x8[idx]]; - tl0 = l[vp9_block2left_8x8[idx]]; + ta0 = a[vp9_block2above[TX_8X8][idx]]; + tl0 = l[vp9_block2left[TX_8X8][idx]]; rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC, &ta0, &tl0, TX_8X8); @@ -1540,10 +1544,10 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, static const int iblock[4] = {0, 1, 4, 5}; TX_TYPE tx_type; int i; - ta0 = a[vp9_block2above[ib]]; - ta1 = a[vp9_block2above[ib + 1]]; - tl0 = l[vp9_block2left[ib]]; - tl1 = l[vp9_block2left[ib + 4]]; + ta0 = a[vp9_block2above[TX_4X4][ib]]; + ta1 = a[vp9_block2above[TX_4X4][ib + 1]]; + tl0 = l[vp9_block2left[TX_4X4][ib]]; + tl1 = l[vp9_block2left[TX_4X4][ib + 4]]; distortion = 0; rate_t = 0; for (i = 0; i < 4; ++i) { @@ -1596,15 +1600,15 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vp9_encode_intra8x8(x, ib); if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - a[vp9_block2above_8x8[idx]] = besta0; - a[vp9_block2above_8x8[idx] + 1] = besta1; - l[vp9_block2left_8x8[idx]] = bestl0; - l[vp9_block2left_8x8[idx] + 1] = bestl1; + a[vp9_block2above[TX_8X8][idx]] = besta0; + a[vp9_block2above[TX_8X8][idx] + 1] = besta1; + l[vp9_block2left[TX_8X8][idx]] = bestl0; + l[vp9_block2left[TX_8X8][idx] + 1] = bestl1; } else { - a[vp9_block2above[ib]] = besta0; - a[vp9_block2above[ib + 1]] = besta1; - l[vp9_block2left[ib]] = bestl0; - l[vp9_block2left[ib + 4]] = bestl1; + a[vp9_block2above[TX_4X4][ib]] = besta0; + a[vp9_block2above[TX_4X4][ib + 1]] = besta1; + l[vp9_block2left[TX_4X4][ib]] = bestl0; + l[vp9_block2left[TX_4X4][ib + 4]] = bestl1; } return best_rd; @@ -1681,7 +1685,8 @@ static int rd_cost_mbuv_4x4(MACROBLOCK *mb, int backup) { for (b = 16; b < 24; b++) cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV, - ta + vp9_block2above[b], tl + vp9_block2left[b], + ta + vp9_block2above[TX_4X4][b], + tl + vp9_block2left[TX_4X4][b], TX_4X4); return cost; @@ -1721,8 +1726,8 @@ static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) { for (b = 16; b < 24; b += 4) cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV, - ta + vp9_block2above_8x8[b], - tl + vp9_block2left_8x8[b], TX_8X8); + ta + vp9_block2above[TX_8X8][b], + tl + vp9_block2left[TX_8X8][b], TX_8X8); return cost; } @@ -2244,8 +2249,8 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x, thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16); *distortion += thisdistortion; *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[i], - tl + vp9_block2left[i], TX_4X4); + ta + vp9_block2above[TX_4X4][i], + tl + vp9_block2left[TX_4X4][i], TX_4X4); } } *distortion >>= 2; @@ -2296,8 +2301,9 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); otherdist += thisdistortion; othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above_8x8[idx], - tlcp + vp9_block2left_8x8[idx], TX_8X8); + tacp + vp9_block2above[TX_8X8][idx], + tlcp + vp9_block2left[TX_8X8][idx], + TX_8X8); } for (j = 0; j < 4; j += 2) { bd = &xd->block[ib + iblock[j]]; @@ -2307,13 +2313,13 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); *distortion += thisdistortion; *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[ib + iblock[j]], - tl + vp9_block2left[ib + iblock[j]], - TX_4X4); + ta + vp9_block2above[TX_4X4][ib + iblock[j]], + tl + vp9_block2left[TX_4X4][ib + iblock[j]], + TX_4X4); *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[ib + iblock[j] + 1], - tl + vp9_block2left[ib + iblock[j]], - TX_4X4); + ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1], + tl + vp9_block2left[TX_4X4][ib + iblock[j]], + TX_4X4); } } else /* 8x8 */ { if (otherrd) { @@ -2325,13 +2331,13 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); otherdist += thisdistortion; othercost += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above[ib + iblock[j]], - tlcp + vp9_block2left[ib + iblock[j]], - TX_4X4); + tacp + vp9_block2above[TX_4X4][ib + iblock[j]], + tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], + TX_4X4); othercost += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above[ib + iblock[j] + 1], - tlcp + vp9_block2left[ib + iblock[j]], - TX_4X4); + tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1], + tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], + TX_4X4); } } x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32); @@ -2339,8 +2345,8 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); *distortion += thisdistortion; *labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above_8x8[idx], - tl + vp9_block2left_8x8[idx], TX_8X8); + ta + vp9_block2above[TX_8X8][idx], + tl + vp9_block2left[TX_8X8][idx], TX_8X8); } } } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 4d9e2f748..b21229e80 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -103,30 +103,33 @@ static void fill_value_tokens() { vp9_dct_value_tokens_ptr = dct_value_tokens + DCT_MAX_VALUE; vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; } - static void tokenize_b(VP9_COMP *cpi, MACROBLOCKD *xd, - const BLOCKD * const b, + const int ib, TOKENEXTRA **tp, PLANE_TYPE type, - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, TX_SIZE tx_size, int dry_run) { int pt; /* near block/prev token context index */ int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; + const BLOCKD * const b = xd->block + ib; int eob = b->eob; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp; /* store tokens starting here */ const short *qcoeff_ptr = b->qcoeff; int seg_eob; - int segment_id = xd->mode_info_context->mbmi.segment_id; + const int segment_id = xd->mode_info_context->mbmi.segment_id; const int *bands, *scan; vp9_coeff_count *counts; vp9_coeff_probs *probs; const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type(xd, b) : DCT_DCT; - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); + ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context + + vp9_block2above[tx_size][ib]; + ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context + + vp9_block2left[tx_size][ib]; + ENTROPY_CONTEXT a_ec = *a, l_ec = *l; + switch (tx_size) { default: case TX_4X4: @@ -152,6 +155,10 @@ static void tokenize_b(VP9_COMP *cpi, bands = vp9_coef_bands; scan = vp9_default_zig_zag1d; } else { +#if CONFIG_CNVCONTEXT + a_ec = (a[0] + a[1]) != 0; + l_ec = (l[0] + l[1]) != 0; +#endif seg_eob = 64; bands = vp9_coef_bands_8x8; scan = vp9_default_zig_zag1d_8x8; @@ -165,6 +172,10 @@ static void tokenize_b(VP9_COMP *cpi, } break; case TX_16X16: +#if CONFIG_CNVCONTEXT + a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; +#endif seg_eob = 256; bands = vp9_coef_bands_16x16; scan = vp9_default_zig_zag1d_16x16; @@ -194,6 +205,8 @@ static void tokenize_b(VP9_COMP *cpi, #endif } + VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); + if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) seg_eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); @@ -204,7 +217,6 @@ static void tokenize_b(VP9_COMP *cpi, if (c < eob) { const int rc = scan[c]; const int v = qcoeff_ptr[rc]; - assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE); t->Extra = vp9_dct_value_tokens_ptr[v].Extra; @@ -226,7 +238,17 @@ static void tokenize_b(VP9_COMP *cpi, } while (c < eob && ++c < seg_eob); *tp = t; - *a = *l = (c > !type); /* 0 <-> all coeff data is zero */ + a_ec = l_ec = (c > !type); /* 0 <-> all coeff data is zero */ + a[0] = a_ec; + l[0] = l_ec; + + if (tx_size == TX_8X8 && type != PLANE_TYPE_Y2) { + a[1] = a_ec; + l[1] = l_ec; + } else if (tx_size == TX_16X16) { + a[1] = a[2] = a[3] = a_ec; + l[1] = l[2] = l[3] = l_ec; + } } int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_2nd_order) { @@ -380,8 +402,6 @@ void vp9_tokenize_mb(VP9_COMP *cpi, int tx_size = xd->mode_info_context->mbmi.txfm_size; int mb_skip_context = vp9_get_pred_context(&cpi->common, xd, PRED_MBSKIP); TOKENEXTRA *t_backup = *t; - ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *) xd->above_context; - ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *) xd->left_context; // If the MB is going to be skipped because of a segment level flag // exclude this from the skip count stats used to calculate the @@ -425,6 +445,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi, } else { vp9_reset_mb_tokens_context(xd); } + if (dry_run) *t = t_backup; return; @@ -434,9 +455,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi, cpi->skip_false_count[mb_skip_context] += skip_inc; if (has_2nd_order) { - tokenize_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, - A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24], - tx_size, dry_run); + tokenize_b(cpi, xd, 24, t, PLANE_TYPE_Y2, tx_size, dry_run); plane_type = PLANE_TYPE_Y_NO_DC; } else { xd->above_context->y2 = 0; @@ -445,85 +464,29 @@ void vp9_tokenize_mb(VP9_COMP *cpi, } if (tx_size == TX_16X16) { -#if CONFIG_CNVCONTEXT - ENTROPY_CONTEXT above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; - ENTROPY_CONTEXT left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; -#else - ENTROPY_CONTEXT above_ec = A[0]; - ENTROPY_CONTEXT left_ec = L[0]; -#endif - tokenize_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, - &above_ec, &left_ec, TX_16X16, dry_run); - A[1] = A[2] = A[3] = A[0] = above_ec; - L[1] = L[2] = L[3] = L[0] = left_ec; + tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); for (b = 16; b < 24; b += 4) { - ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; - ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; -#if CONFIG_CNVCONTEXT - above_ec = (a[0] + a[1]) != 0; - left_ec = (l[0] + l[1]) != 0; -#else - above_ec = a[0]; - left_ec = l[0]; -#endif - tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - &above_ec, &left_ec, TX_8X8, dry_run); - a[1] = a[0] = above_ec; - l[1] = l[0] = left_ec; + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); } - A[8] = 0; - L[8] = 0; } else if (tx_size == TX_8X8) { for (b = 0; b < 16; b += 4) { - ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; - ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; -#if CONFIG_CNVCONTEXT - ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; - ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; -#else - ENTROPY_CONTEXT above_ec = a[0]; - ENTROPY_CONTEXT left_ec = l[0]; -#endif - tokenize_b(cpi, xd, xd->block + b, t, plane_type, - &above_ec, &left_ec, TX_8X8, dry_run); - a[1] = a[0] = above_ec; - l[1] = l[0] = left_ec; + tokenize_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run); } if (xd->mode_info_context->mbmi.mode == I8X8_PRED || xd->mode_info_context->mbmi.mode == SPLITMV) { for (b = 16; b < 24; b++) { - tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A + vp9_block2above[b], L + vp9_block2left[b], - TX_4X4, dry_run); + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); } } else { for (b = 16; b < 24; b += 4) { - ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; - ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; -#if CONFIG_CNVCONTEXT - ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; - ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; -#else - ENTROPY_CONTEXT above_ec = a[0]; - ENTROPY_CONTEXT left_ec = l[0]; -#endif - tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - &above_ec, &left_ec, TX_8X8, dry_run); - a[1] = a[0] = above_ec; - l[1] = l[0] = left_ec; + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); } } } else { - for (b = 0; b < 16; b++) { - tokenize_b(cpi, xd, xd->block + b, t, plane_type, - A + vp9_block2above[b], L + vp9_block2left[b], - TX_4X4, dry_run); - } - - for (b = 16; b < 24; b++) { - tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A + vp9_block2above[b], L + vp9_block2left[b], - TX_4X4, dry_run); + for (b = 0; b < 24; b++) { + if (b >= 16) + plane_type = PLANE_TYPE_UV; + tokenize_b(cpi, xd, b, t, plane_type, TX_4X4, dry_run); } } if (dry_run) @@ -735,13 +698,12 @@ void vp9_tokenize_initialize() { static __inline void stuff_b(VP9_COMP *cpi, MACROBLOCKD *xd, - const BLOCKD * const b, + const int ib, TOKENEXTRA **tp, PLANE_TYPE type, - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, TX_SIZE tx_size, int dry_run) { + const BLOCKD * const b = xd->block + ib; const int *bands; vp9_coeff_count *counts; vp9_coeff_probs *probs; @@ -749,7 +711,12 @@ static __inline void stuff_b(VP9_COMP *cpi, TOKENEXTRA *t = *tp; const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type(xd, b) : DCT_DCT; - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); + + ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context + + vp9_block2above[tx_size][ib]; + ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context + + vp9_block2left[tx_size][ib]; + ENTROPY_CONTEXT a_ec = *a, l_ec = *l; switch (tx_size) { default: @@ -764,6 +731,12 @@ static __inline void stuff_b(VP9_COMP *cpi, } break; case TX_8X8: +#if CONFIG_CNVCONTEXT + if (type != PLANE_TYPE_Y2) { + a_ec = (a[0] + a[1]) != 0; + l_ec = (l[0] + l[1]) != 0; + } +#endif bands = vp9_coef_bands_8x8; if (tx_type != DCT_DCT) { counts = cpi->hybrid_coef_counts_8x8; @@ -774,6 +747,10 @@ static __inline void stuff_b(VP9_COMP *cpi, } break; case TX_16X16: +#if CONFIG_CNVCONTEXT + a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; +#endif bands = vp9_coef_bands_16x16; if (tx_type != DCT_DCT) { counts = cpi->hybrid_coef_counts_16x16; @@ -791,6 +768,9 @@ static __inline void stuff_b(VP9_COMP *cpi, break; #endif } + + VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); + band = bands[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0]; t->Token = DCT_EOB_TOKEN; t->context_tree = probs[type][band][pt]; @@ -798,6 +778,14 @@ static __inline void stuff_b(VP9_COMP *cpi, ++t; *tp = t; *a = *l = 0; + if (tx_size == TX_8X8 && type != PLANE_TYPE_Y2) { + a[1] = 0; + l[1] = 0; + } else if (tx_size == TX_16X16) { + a[1] = a[2] = a[3] = 0; + l[1] = l[2] = l[3] = 0; + } + if (!dry_run) { ++counts[type][band][pt][DCT_EOB_TOKEN]; } @@ -805,16 +793,12 @@ static __inline void stuff_b(VP9_COMP *cpi, static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context; PLANE_TYPE plane_type; int b; int has_2nd_order = get_2nd_order_usage(xd); if (has_2nd_order) { - stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, - A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24], - TX_8X8, dry_run); + stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_8X8, dry_run); plane_type = PLANE_TYPE_Y_NO_DC; } else { #if CONFIG_CNVCONTEXT @@ -824,86 +808,35 @@ static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd, plane_type = PLANE_TYPE_Y_WITH_DC; } - for (b = 0; b < 16; b += 4) { - ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; - ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; -#if CONFIG_CNVCONTEXT - ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; - ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; -#else - ENTROPY_CONTEXT above_ec = a[0]; - ENTROPY_CONTEXT left_ec = l[0]; -#endif - stuff_b(cpi, xd, xd->block + b, t, plane_type, - &above_ec, &left_ec, TX_8X8, dry_run); - a[1] = a[0] = above_ec; - l[1] = l[0] = left_ec; - } - - for (b = 16; b < 24; b += 4) { - ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; - ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; -#if CONFIG_CNVCONTEXT - ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; - ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; -#else - ENTROPY_CONTEXT above_ec = a[0]; - ENTROPY_CONTEXT left_ec = l[0]; -#endif - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - &above_ec, &left_ec, TX_8X8, dry_run); - a[1] = a[0] = above_ec; - l[1] = l[0] = left_ec; + for (b = 0; b < 24; b += 4) { + if (b >= 16) + plane_type = PLANE_TYPE_UV; + stuff_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run); } } static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; int b; -#if CONFIG_CNVCONTEXT - ENTROPY_CONTEXT above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; - ENTROPY_CONTEXT left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; -#else - ENTROPY_CONTEXT above_ec = A[0]; - ENTROPY_CONTEXT left_ec = L[0]; -#endif - stuff_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, - &above_ec, &left_ec, TX_16X16, dry_run); - A[1] = A[2] = A[3] = A[0] = above_ec; - L[1] = L[2] = L[3] = L[0] = left_ec; + stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); for (b = 16; b < 24; b += 4) { - ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; - ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; -#if CONFIG_CNVCONTEXT - above_ec = (a[0] + a[1]) != 0; - left_ec = (l[0] + l[1]) != 0; -#else - above_ec = a[0]; - left_ec = l[0]; -#endif - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - &above_ec, &left_ec, TX_8X8, dry_run); - a[1] = a[0] = above_ec; - l[1] = l[0] = left_ec; + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); } - A[8] = 0; - L[8] = 0; +#if CONFIG_CNVCONTEXT + xd->above_context->y2 = 0; + xd->left_context->y2 = 0; +#endif } static void stuff_mb_4x4(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; int b; PLANE_TYPE plane_type; int has_2nd_order = get_2nd_order_usage(xd); if (has_2nd_order) { - stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, A + vp9_block2above[24], - L + vp9_block2left[24], TX_4X4, dry_run); + stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_4X4, dry_run); plane_type = PLANE_TYPE_Y_NO_DC; } else { xd->above_context->y2 = 0; @@ -911,27 +844,21 @@ static void stuff_mb_4x4(VP9_COMP *cpi, MACROBLOCKD *xd, plane_type = PLANE_TYPE_Y_WITH_DC; } - for (b = 0; b < 16; b++) - stuff_b(cpi, xd, xd->block + b, t, plane_type, A + vp9_block2above[b], - L + vp9_block2left[b], TX_4X4, dry_run); - - for (b = 16; b < 24; b++) - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, A + vp9_block2above[b], - L + vp9_block2left[b], TX_4X4, dry_run); + for (b = 0; b < 24; b++) { + if (b >= 16) + plane_type = PLANE_TYPE_UV; + stuff_b(cpi, xd, b, t, plane_type, TX_4X4, dry_run); + } } static void stuff_mb_8x8_4x4uv(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context; - ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context; PLANE_TYPE plane_type; int b; int has_2nd_order = get_2nd_order_usage(xd); if (has_2nd_order) { - stuff_b(cpi, xd, xd->block + 24, t, PLANE_TYPE_Y2, - A + vp9_block2above_8x8[24], L + vp9_block2left_8x8[24], - TX_8X8, dry_run); + stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_8X8, dry_run); plane_type = PLANE_TYPE_Y_NO_DC; } else { xd->above_context->y2 = 0; @@ -940,24 +867,12 @@ static void stuff_mb_8x8_4x4uv(VP9_COMP *cpi, MACROBLOCKD *xd, } for (b = 0; b < 16; b += 4) { - ENTROPY_CONTEXT *const a = A + vp9_block2above_8x8[b]; - ENTROPY_CONTEXT *const l = L + vp9_block2left_8x8[b]; -#if CONFIG_CNVCONTEXT - ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; - ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; -#else - ENTROPY_CONTEXT above_ec = a[0]; - ENTROPY_CONTEXT left_ec = l[0]; -#endif - stuff_b(cpi, xd, xd->block + b, t, plane_type, - &above_ec, &left_ec, TX_8X8, dry_run); - a[1] = a[0] = above_ec; - l[1] = l[0] = left_ec; + stuff_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run); } - for (b = 16; b < 24; b++) - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, A + vp9_block2above[b], - L + vp9_block2left[b], TX_4X4, dry_run); + for (b = 16; b < 24; b++) { + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); + } } void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { From 4d0ec7aacd2227b1b98d1f5100bde64c7797b962 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 10 Dec 2012 12:09:07 -0800 Subject: [PATCH 17/77] Consistently use get_prob(), clip_prob() and newly added clip_pixel(). Add a function clip_pixel() to clip a pixel value to the [0,255] range of allowed values, and use this where-ever appropriate (e.g. prediction, reconstruction). Likewise, consistently use the recently added function clip_prob(), which calculates a binary probability in the [1,255] range. If possible, try to use get_prob() or its sister get_binary_prob() to calculate binary probabilities, for consistency. Since in some places, this means that binary probability calculations are changed (we use {255,256}*count0/(total) in a range of places, and all of these are now changed to use 256*count0+(total>>1)/total), this changes the encoding result, so this patch warrants some extensive testing. Change-Id: Ibeeff8d886496839b8e0c0ace9ccc552351f7628 --- vp9/common/vp9_common.h | 6 +- vp9/common/vp9_entropy.c | 8 +- vp9/common/vp9_entropymode.c | 202 +++++++++++---------------------- vp9/common/vp9_entropymv.c | 42 +++---- vp9/common/vp9_filter.c | 74 ++++-------- vp9/common/vp9_idctllm.c | 31 +---- vp9/common/vp9_pred_common.c | 20 +--- vp9/common/vp9_recon.c | 75 ++---------- vp9/common/vp9_reconinter.c | 10 +- vp9/common/vp9_reconintra.c | 26 ++--- vp9/common/vp9_reconintra4x4.c | 100 ++++++++-------- vp9/common/vp9_treecoder.c | 29 +---- vp9/common/vp9_treecoder.h | 27 +++-- vp9/decoder/vp9_dequantize.c | 19 +--- vp9/encoder/vp9_bitstream.c | 30 ++--- vp9/encoder/vp9_mcomp.c | 9 +- vp9/encoder/vp9_onyx_if.c | 41 +------ vp9/encoder/vp9_rdopt.c | 10 +- vp9/encoder/vp9_segmentation.c | 39 +------ vp9/encoder/vp9_tokenize.c | 3 +- 20 files changed, 236 insertions(+), 565 deletions(-) diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h index 893a3f8b3..d6887afa0 100644 --- a/vp9/common/vp9_common.h +++ b/vp9/common/vp9_common.h @@ -17,7 +17,7 @@ /* Interface header for common constant data structures and lookup tables */ #include "vpx_mem/vpx_mem.h" - +#include "vpx/vpx_integer.h" #include "vp9/common/vp9_common_types.h" /* Only need this for fixed-size arrays, for structs just assign. */ @@ -38,4 +38,8 @@ #define vp9_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest)); +static __inline uint8_t clip_pixel(int val) { + return (val > 255) ? 255u : (val < 0) ? 0u : val; +} + #endif /* common_h */ diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 9d8e924d5..60885f4e8 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -360,15 +360,13 @@ static void update_coef_probs(vp9_coeff_probs *dst_coef_probs, vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, coef_probs, branch_ct, - coef_counts[i][j][k], 256, 1); + coef_counts[i][j][k]); for (t = 0; t < ENTROPY_NODES; ++t) { - int prob; count = branch_ct[t][0] + branch_ct[t][1]; count = count > count_sat ? count_sat : count; factor = (update_factor * count / count_sat); - prob = ((int)pre_coef_probs[i][j][k][t] * (256 - factor) + - (int)coef_probs[t] * factor + 128) >> 8; - dst_coef_probs[i][j][k][t] = clip_prob(prob); + dst_coef_probs[i][j][k][t] = weighted_prob(pre_coef_probs[i][j][k][t], + coef_probs[t], factor); } } } diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index cda217670..d7c2d8e06 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -309,24 +309,24 @@ void vp9_init_mbmode_probs(VP9_COMMON *x) { vp9_tree_probs_from_distribution(VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree, x->fc.ymode_prob, - bct, y_mode_cts, 256, 1); + bct, y_mode_cts); #if CONFIG_SUPERBLOCKS vp9_tree_probs_from_distribution(VP9_I32X32_MODES, vp9_sb_ymode_encodings, vp9_sb_ymode_tree, x->fc.sb_ymode_prob, - bct, y_mode_cts, 256, 1); + bct, y_mode_cts); #endif { int i; for (i = 0; i < 8; i++) { vp9_tree_probs_from_distribution(VP9_YMODES, vp9_kf_ymode_encodings, vp9_kf_ymode_tree, x->kf_ymode_prob[i], - bct, kf_y_mode_cts[i], 256, 1); + bct, kf_y_mode_cts[i]); #if CONFIG_SUPERBLOCKS vp9_tree_probs_from_distribution(VP9_I32X32_MODES, vp9_sb_kf_ymode_encodings, vp9_sb_kf_ymode_tree, x->sb_kf_ymode_prob[i], bct, - kf_y_mode_cts[i], 256, 1); + kf_y_mode_cts[i]); #endif } } @@ -335,16 +335,16 @@ void vp9_init_mbmode_probs(VP9_COMMON *x) { for (i = 0; i < VP9_YMODES; i++) { vp9_tree_probs_from_distribution(VP9_UV_MODES, vp9_uv_mode_encodings, vp9_uv_mode_tree, x->kf_uv_mode_prob[i], - bct, kf_uv_mode_cts[i], 256, 1); + bct, kf_uv_mode_cts[i]); vp9_tree_probs_from_distribution(VP9_UV_MODES, vp9_uv_mode_encodings, vp9_uv_mode_tree, x->fc.uv_mode_prob[i], - bct, uv_mode_cts[i], 256, 1); + bct, uv_mode_cts[i]); } } vp9_tree_probs_from_distribution(VP9_I8X8_MODES, vp9_i8x8_mode_encodings, vp9_i8x8_mode_tree, x->fc.i8x8_mode_prob, - bct, i8x8_mode_cts, 256, 1); + bct, i8x8_mode_cts); vpx_memcpy(x->fc.sub_mv_ref_prob, vp9_sub_mv_ref_prob2, sizeof(vp9_sub_mv_ref_prob2)); @@ -362,7 +362,7 @@ static void intra_bmode_probs_from_distribution( unsigned int branch_ct[VP9_NKF_BINTRAMODES - 1][2], const unsigned int events[VP9_NKF_BINTRAMODES]) { vp9_tree_probs_from_distribution(VP9_NKF_BINTRAMODES, vp9_bmode_encodings, - vp9_bmode_tree, p, branch_ct, events, 256, 1); + vp9_bmode_tree, p, branch_ct, events); } void vp9_default_bmode_probs(vp9_prob p[VP9_NKF_BINTRAMODES - 1]) { @@ -375,7 +375,7 @@ static void intra_kf_bmode_probs_from_distribution( unsigned int branch_ct[VP9_KF_BINTRAMODES - 1][2], const unsigned int events[VP9_KF_BINTRAMODES]) { vp9_tree_probs_from_distribution(VP9_KF_BINTRAMODES, vp9_kf_bmode_encodings, - vp9_kf_bmode_tree, p, branch_ct, events, 256, 1); + vp9_kf_bmode_tree, p, branch_ct, events); } void vp9_kf_default_bmode_probs(vp9_prob p[VP9_KF_BINTRAMODES] @@ -495,17 +495,14 @@ void vp9_update_mode_context(VP9_COMMON *pc) { for (j = 0; j < INTER_MODE_CONTEXTS; j++) { for (i = 0; i < 4; i++) { - int this_prob; - int count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; - int factor; - { - this_prob = count > 0 ? 256 * mv_ref_ct[j][i][0] / count : 128; - count = count > MVREF_COUNT_SAT ? MVREF_COUNT_SAT : count; - factor = (MVREF_MAX_UPDATE_FACTOR * count / MVREF_COUNT_SAT); - this_prob = (pc->fc.vp9_mode_contexts[j][i] * (256 - factor) + - this_prob * factor + 128) >> 8; - mode_context[j][i] = clip_prob(this_prob); - } + int count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1], factor; + + count = count > MVREF_COUNT_SAT ? MVREF_COUNT_SAT : count; + factor = (MVREF_MAX_UPDATE_FACTOR * count / MVREF_COUNT_SAT); + mode_context[j][i] = weighted_prob(pc->fc.vp9_mode_contexts[j][i], + get_binary_prob(mv_ref_ct[j][i][0], + mv_ref_ct[j][i][1]), + factor); } } } @@ -531,25 +528,33 @@ void print_mode_contexts(VP9_COMMON *pc) { } #endif -// #define MODE_COUNT_TESTING #define MODE_COUNT_SAT 20 #define MODE_MAX_UPDATE_FACTOR 144 +static void update_mode_probs(int n_modes, struct vp9_token_struct *encoding, + const vp9_tree_index *tree, unsigned int *cnt, + vp9_prob *pre_probs, vp9_prob *dst_probs) { +#define MAX_PROBS 32 + vp9_prob probs[MAX_PROBS]; + unsigned int branch_ct[MAX_PROBS][2]; + int t, count, factor; + + assert(n_modes - 1 < MAX_PROBS); + vp9_tree_probs_from_distribution(n_modes, encoding, tree, probs, + branch_ct, cnt); + for (t = 0; t < n_modes - 1; ++t) { + count = branch_ct[t][0] + branch_ct[t][1]; + count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; + factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); + dst_probs[t] = weighted_prob(pre_probs[t], probs[t], factor); + } +} + +// #define MODE_COUNT_TESTING void vp9_adapt_mode_probs(VP9_COMMON *cm) { - int i, t, count, factor; - unsigned int branch_ct[32][2]; - vp9_prob ymode_probs[VP9_YMODES - 1]; -#if CONFIG_SUPERBLOCKS - vp9_prob sb_ymode_probs[VP9_I32X32_MODES - 1]; -#endif - vp9_prob uvmode_probs[VP9_UV_MODES - 1]; - vp9_prob bmode_probs[VP9_NKF_BINTRAMODES - 1]; - vp9_prob i8x8_mode_probs[VP9_I8X8_MODES - 1]; - vp9_prob sub_mv_ref_probs[VP9_SUBMVREFS - 1]; - vp9_prob mbsplit_probs[VP9_NUMMBSPLITS - 1]; -#if CONFIG_COMP_INTERINTRA_PRED - vp9_prob interintra_prob; -#endif + int i; #ifdef MODE_COUNT_TESTING + int t; + printf("static const unsigned int\nymode_counts" "[VP9_YMODES] = {\n"); for (t = 0; t < VP9_YMODES; ++t) printf("%d, ", cm->fc.ymode_counts[t]); @@ -590,116 +595,45 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { printf("};\n"); #endif #endif - vp9_tree_probs_from_distribution( - VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree, - ymode_probs, branch_ct, cm->fc.ymode_counts, - 256, 1); - for (t = 0; t < VP9_YMODES - 1; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - prob = ((int)cm->fc.pre_ymode_prob[t] * (256 - factor) + - (int)ymode_probs[t] * factor + 128) >> 8; - cm->fc.ymode_prob[t] = clip_prob(prob); - } + + update_mode_probs(VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree, + cm->fc.ymode_counts, cm->fc.pre_ymode_prob, + cm->fc.ymode_prob); #if CONFIG_SUPERBLOCKS - vp9_tree_probs_from_distribution(VP9_I32X32_MODES, - vp9_sb_ymode_encodings, vp9_sb_ymode_tree, - sb_ymode_probs, branch_ct, - cm->fc.sb_ymode_counts, - 256, 1); - for (t = 0; t < VP9_I32X32_MODES - 1; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - prob = ((int)cm->fc.pre_sb_ymode_prob[t] * (256 - factor) + - (int)sb_ymode_probs[t] * factor + 128) >> 8; - cm->fc.sb_ymode_prob[t] = clip_prob(prob); - } + update_mode_probs(VP9_I32X32_MODES, vp9_sb_ymode_encodings, vp9_sb_ymode_tree, + cm->fc.sb_ymode_counts, cm->fc.pre_sb_ymode_prob, + cm->fc.sb_ymode_prob); #endif for (i = 0; i < VP9_YMODES; ++i) { - vp9_tree_probs_from_distribution(VP9_UV_MODES, vp9_uv_mode_encodings, - vp9_uv_mode_tree, uvmode_probs, branch_ct, - cm->fc.uv_mode_counts[i], 256, 1); - for (t = 0; t < VP9_UV_MODES - 1; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - prob = ((int)cm->fc.pre_uv_mode_prob[i][t] * (256 - factor) + - (int)uvmode_probs[t] * factor + 128) >> 8; - cm->fc.uv_mode_prob[i][t] = clip_prob(prob); - } - } - vp9_tree_probs_from_distribution(VP9_NKF_BINTRAMODES, vp9_bmode_encodings, - vp9_bmode_tree, bmode_probs, branch_ct, - cm->fc.bmode_counts, 256, 1); - for (t = 0; t < VP9_NKF_BINTRAMODES - 1; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - prob = ((int)cm->fc.pre_bmode_prob[t] * (256 - factor) + - (int)bmode_probs[t] * factor + 128) >> 8; - cm->fc.bmode_prob[t] = clip_prob(prob); - } - vp9_tree_probs_from_distribution(VP9_I8X8_MODES, vp9_i8x8_mode_encodings, - vp9_i8x8_mode_tree, i8x8_mode_probs, - branch_ct, cm->fc.i8x8_mode_counts, 256, 1); - for (t = 0; t < VP9_I8X8_MODES - 1; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - prob = ((int)cm->fc.pre_i8x8_mode_prob[t] * (256 - factor) + - (int)i8x8_mode_probs[t] * factor + 128) >> 8; - cm->fc.i8x8_mode_prob[t] = clip_prob(prob); + update_mode_probs(VP9_UV_MODES, vp9_uv_mode_encodings, vp9_uv_mode_tree, + cm->fc.uv_mode_counts[i], cm->fc.pre_uv_mode_prob[i], + cm->fc.uv_mode_prob[i]); } + update_mode_probs(VP9_NKF_BINTRAMODES, vp9_bmode_encodings, vp9_bmode_tree, + cm->fc.bmode_counts, cm->fc.pre_bmode_prob, + cm->fc.bmode_prob); + update_mode_probs(VP9_I8X8_MODES, vp9_i8x8_mode_encodings, + vp9_i8x8_mode_tree, cm->fc.i8x8_mode_counts, + cm->fc.pre_i8x8_mode_prob, cm->fc.i8x8_mode_prob); for (i = 0; i < SUBMVREF_COUNT; ++i) { - vp9_tree_probs_from_distribution(VP9_SUBMVREFS, - vp9_sub_mv_ref_encoding_array, - vp9_sub_mv_ref_tree, sub_mv_ref_probs, - branch_ct, cm->fc.sub_mv_ref_counts[i], - 256, 1); - for (t = 0; t < VP9_SUBMVREFS - 1; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - prob = ((int)cm->fc.pre_sub_mv_ref_prob[i][t] * (256 - factor) + - (int)sub_mv_ref_probs[t] * factor + 128) >> 8; - cm->fc.sub_mv_ref_prob[i][t] = clip_prob(prob); - } - } - vp9_tree_probs_from_distribution(VP9_NUMMBSPLITS, vp9_mbsplit_encodings, - vp9_mbsplit_tree, mbsplit_probs, branch_ct, - cm->fc.mbsplit_counts, 256, 1); - for (t = 0; t < VP9_NUMMBSPLITS - 1; ++t) { - int prob; - count = branch_ct[t][0] + branch_ct[t][1]; - count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; - factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - prob = ((int)cm->fc.pre_mbsplit_prob[t] * (256 - factor) + - (int)mbsplit_probs[t] * factor + 128) >> 8; - cm->fc.mbsplit_prob[t] = clip_prob(prob); + update_mode_probs(VP9_SUBMVREFS, vp9_sub_mv_ref_encoding_array, + vp9_sub_mv_ref_tree, cm->fc.sub_mv_ref_counts[i], + cm->fc.pre_sub_mv_ref_prob[i], cm->fc.sub_mv_ref_prob[i]); } + update_mode_probs(VP9_NUMMBSPLITS, vp9_mbsplit_encodings, vp9_mbsplit_tree, + cm->fc.mbsplit_counts, cm->fc.pre_mbsplit_prob, + cm->fc.mbsplit_prob); #if CONFIG_COMP_INTERINTRA_PRED if (cm->use_interintra) { - int prob; - interintra_prob = vp9_bin_prob_from_distribution(cm->fc.interintra_counts); + int factor, interintra_prob, count; + + interintra_prob = get_binary_prob(cm->fc.interintra_counts[0], + cm->fc.interintra_counts[1]); count = cm->fc.interintra_counts[0] + cm->fc.interintra_counts[1]; count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count; factor = (MODE_MAX_UPDATE_FACTOR * count / MODE_COUNT_SAT); - prob = ((int)cm->fc.pre_interintra_prob * (256 - factor) + - (int)interintra_prob * factor + 128) >> 8; - if (prob <= 0) - cm->fc.interintra_prob = 1; - else if (prob > 255) - cm->fc.interintra_prob = 255; - else - cm->fc.interintra_prob = prob; + cm->fc.interintra_prob = weighted_prob(cm->fc.pre_interintra_prob, + interintra_prob, factor); } #endif } diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c index 94543c54d..99e3c2e8c 100644 --- a/vp9/common/vp9_entropymv.c +++ b/vp9/common/vp9_entropymv.c @@ -213,16 +213,12 @@ void vp9_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx, static void adapt_prob(vp9_prob *dest, vp9_prob prep, vp9_prob newp, unsigned int ct[2]) { - int factor; - int prob; int count = ct[0] + ct[1]; + if (count) { count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count; - factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); - prob = ((int)prep * (256 - factor) + (int)(newp) * factor + 128) >> 8; - prob += !prob; - prob = (prob > 255 ? 255 : prob); - *dest = prob; + *dest = weighted_prob(prep, newp, + MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT); } } @@ -251,11 +247,10 @@ void vp9_counts_to_nmv_context( vp9_mv_joint_tree, prob->joints, branch_ct_joint, - NMVcount->joints, - 256, 1); + NMVcount->joints); for (i = 0; i < 2; ++i) { - prob->comps[i].sign = - vp9_bin_prob_from_distribution(NMVcount->comps[i].sign); + prob->comps[i].sign = get_binary_prob(NMVcount->comps[i].sign[0], + NMVcount->comps[i].sign[1]); branch_ct_sign[i][0] = NMVcount->comps[i].sign[0]; branch_ct_sign[i][1] = NMVcount->comps[i].sign[1]; vp9_tree_probs_from_distribution(MV_CLASSES, @@ -263,18 +258,16 @@ void vp9_counts_to_nmv_context( vp9_mv_class_tree, prob->comps[i].classes, branch_ct_classes[i], - NMVcount->comps[i].classes, - 256, 1); + NMVcount->comps[i].classes); vp9_tree_probs_from_distribution(CLASS0_SIZE, vp9_mv_class0_encodings, vp9_mv_class0_tree, prob->comps[i].class0, branch_ct_class0[i], - NMVcount->comps[i].class0, - 256, 1); + NMVcount->comps[i].class0); for (j = 0; j < MV_OFFSET_BITS; ++j) { - prob->comps[i].bits[j] = vp9_bin_prob_from_distribution( - NMVcount->comps[i].bits[j]); + prob->comps[i].bits[j] = get_binary_prob(NMVcount->comps[i].bits[j][0], + NMVcount->comps[i].bits[j][1]); branch_ct_bits[i][j][0] = NMVcount->comps[i].bits[j][0]; branch_ct_bits[i][j][1] = NMVcount->comps[i].bits[j][1]; } @@ -286,26 +279,25 @@ void vp9_counts_to_nmv_context( vp9_mv_fp_tree, prob->comps[i].class0_fp[k], branch_ct_class0_fp[i][k], - NMVcount->comps[i].class0_fp[k], - 256, 1); + NMVcount->comps[i].class0_fp[k]); } vp9_tree_probs_from_distribution(4, vp9_mv_fp_encodings, vp9_mv_fp_tree, prob->comps[i].fp, branch_ct_fp[i], - NMVcount->comps[i].fp, - 256, 1); + NMVcount->comps[i].fp); } if (usehp) { for (i = 0; i < 2; ++i) { - prob->comps[i].class0_hp = vp9_bin_prob_from_distribution( - NMVcount->comps[i].class0_hp); + prob->comps[i].class0_hp = + get_binary_prob(NMVcount->comps[i].class0_hp[0], + NMVcount->comps[i].class0_hp[1]); branch_ct_class0_hp[i][0] = NMVcount->comps[i].class0_hp[0]; branch_ct_class0_hp[i][1] = NMVcount->comps[i].class0_hp[1]; - prob->comps[i].hp = - vp9_bin_prob_from_distribution(NMVcount->comps[i].hp); + prob->comps[i].hp = get_binary_prob(NMVcount->comps[i].hp[0], + NMVcount->comps[i].hp[1]); branch_ct_hp[i][0] = NMVcount->comps[i].hp[0]; branch_ct_hp[i][1] = NMVcount->comps[i].hp[1]; } diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c index 18217e627..710b3917e 100644 --- a/vp9/common/vp9_filter.c +++ b/vp9/common/vp9_filter.c @@ -13,6 +13,7 @@ #include "vp9/common/vp9_filter.h" #include "vpx_ports/mem.h" #include "vp9_rtcd.h" +#include "vp9/common/vp9_common.h" DECLARE_ALIGNED(16, const short, vp9_bilinear_filters[SUBPEL_SHIFTS][2]) = { { 128, 0 }, @@ -148,11 +149,11 @@ static void filter_block2d_first_pass_6(unsigned char *src_ptr, unsigned int output_width, const short *vp9_filter) { unsigned int i, j; - int Temp; + int temp; for (i = 0; i < output_height; i++) { for (j = 0; j < output_width; j++) { - Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) + + temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) + ((int)src_ptr[0] * vp9_filter[2]) + ((int)src_ptr[pixel_step] * vp9_filter[3]) + @@ -161,14 +162,7 @@ static void filter_block2d_first_pass_6(unsigned char *src_ptr, (VP9_FILTER_WEIGHT >> 1); /* Rounding */ /* Normalize back to 0-255 */ - Temp = Temp >> VP9_FILTER_SHIFT; - - if (Temp < 0) - Temp = 0; - else if (Temp > 255) - Temp = 255; - - output_ptr[j] = Temp; + output_ptr[j] = clip_pixel(temp >> VP9_FILTER_SHIFT); src_ptr++; } @@ -187,12 +181,12 @@ static void filter_block2d_second_pass_6(int *src_ptr, unsigned int output_width, const short *vp9_filter) { unsigned int i, j; - int Temp; + int temp; for (i = 0; i < output_height; i++) { for (j = 0; j < output_width; j++) { /* Apply filter */ - Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) + + temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) + ((int)src_ptr[0] * vp9_filter[2]) + ((int)src_ptr[pixel_step] * vp9_filter[3]) + @@ -201,14 +195,7 @@ static void filter_block2d_second_pass_6(int *src_ptr, (VP9_FILTER_WEIGHT >> 1); /* Rounding */ /* Normalize back to 0-255 */ - Temp = Temp >> VP9_FILTER_SHIFT; - - if (Temp < 0) - Temp = 0; - else if (Temp > 255) - Temp = 255; - - output_ptr[j] = (unsigned char)Temp; + output_ptr[j] = clip_pixel(temp >> VP9_FILTER_SHIFT); src_ptr++; } @@ -235,12 +222,12 @@ static void filter_block2d_second_pass_avg_6(int *src_ptr, unsigned int output_width, const short *vp9_filter) { unsigned int i, j; - int Temp; + int temp; for (i = 0; i < output_height; i++) { for (j = 0; j < output_width; j++) { /* Apply filter */ - Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) + + temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) + ((int)src_ptr[0] * vp9_filter[2]) + ((int)src_ptr[pixel_step] * vp9_filter[3]) + @@ -249,14 +236,8 @@ static void filter_block2d_second_pass_avg_6(int *src_ptr, (VP9_FILTER_WEIGHT >> 1); /* Rounding */ /* Normalize back to 0-255 */ - Temp = Temp >> VP9_FILTER_SHIFT; - - if (Temp < 0) - Temp = 0; - else if (Temp > 255) - Temp = 255; - - output_ptr[j] = (unsigned char)((output_ptr[j] + Temp + 1) >> 1); + output_ptr[j] = (clip_pixel(temp >> VP9_FILTER_SHIFT) + + output_ptr[j] + 1) >> 1; src_ptr++; } @@ -539,14 +520,8 @@ static void filter_block2d_8_c(const unsigned char *src_ptr, (VP9_FILTER_WEIGHT >> 1); // Rounding // Normalize back to 0-255... - temp >>= VP9_FILTER_SHIFT; - if (temp < 0) { - temp = 0; - } else if (temp > 255) { - temp = 255; - } + *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); src_ptr++; - *output_ptr = temp; output_ptr += intermediate_height; } src_ptr += src_next_row_stride; @@ -573,15 +548,8 @@ static void filter_block2d_8_c(const unsigned char *src_ptr, (VP9_FILTER_WEIGHT >> 1); // Rounding // Normalize back to 0-255... - temp >>= VP9_FILTER_SHIFT; - if (temp < 0) { - temp = 0; - } else if (temp > 255) { - temp = 255; - } - + *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); src_ptr += intermediate_height; - *dst_ptr++ = (unsigned char)temp; } src_ptr += intermediate_next_stride; dst_ptr += dst_next_row_stride; @@ -940,15 +908,15 @@ static void filter_block2d_bil_second_pass(unsigned short *src_ptr, unsigned int width, const short *vp9_filter) { unsigned int i, j; - int Temp; + int temp; for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { /* Apply filter */ - Temp = ((int)src_ptr[0] * vp9_filter[0]) + + temp = ((int)src_ptr[0] * vp9_filter[0]) + ((int)src_ptr[width] * vp9_filter[1]) + (VP9_FILTER_WEIGHT / 2); - dst_ptr[j] = (unsigned int)(Temp >> VP9_FILTER_SHIFT); + dst_ptr[j] = (unsigned int)(temp >> VP9_FILTER_SHIFT); src_ptr++; } @@ -973,15 +941,15 @@ static void filter_block2d_bil_second_pass_avg(unsigned short *src_ptr, unsigned int width, const short *vp9_filter) { unsigned int i, j; - int Temp; + int temp; for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { /* Apply filter */ - Temp = ((int)src_ptr[0] * vp9_filter[0]) + - ((int)src_ptr[width] * vp9_filter[1]) + - (VP9_FILTER_WEIGHT / 2); - dst_ptr[j] = (unsigned int)(((Temp >> VP9_FILTER_SHIFT) + dst_ptr[j] + 1) >> 1); + temp = (((int)src_ptr[0] * vp9_filter[0]) + + ((int)src_ptr[width] * vp9_filter[1]) + + (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT; + dst_ptr[j] = (unsigned int)((temp + dst_ptr[j] + 1) >> 1); src_ptr++; } diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index cc685b99e..9074da22f 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -26,8 +26,8 @@ #include #include "vpx_ports/config.h" #include "vp9/common/vp9_systemdependent.h" - #include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_common.h" static const int cospi8sqrt2minus1 = 20091; static const int sinpi8sqrt2 = 35468; @@ -562,15 +562,7 @@ void vp9_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { - int a = a1 + pred_ptr[c]; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dst_ptr[c] = (unsigned char) a; + dst_ptr[c] = clip_pixel(a1 + pred_ptr[c]); } dst_ptr += stride; @@ -765,14 +757,7 @@ void vp9_dc_only_inv_walsh_add_c(short input_dc, unsigned char *pred_ptr, for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { - int a = tmp[r * 4 + c] + pred_ptr[c]; - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dst_ptr[c] = (unsigned char) a; + dst_ptr[c] = clip_pixel(tmp[r * 4 + c] + pred_ptr[c]); } dst_ptr += stride; @@ -792,15 +777,7 @@ void vp9_dc_only_idct_add_8x8_c(short input_dc, for (b = 0; b < 4; b++) { for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { - int a = a1 + pred_ptr[c]; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dst_ptr[c] = (unsigned char) a; + dst_ptr[c] = clip_pixel(a1 + pred_ptr[c]); } dst_ptr += stride; diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c index 2abae34b2..e8a3c4f5e 100644 --- a/vp9/common/vp9_pred_common.c +++ b/vp9/common/vp9_pred_common.c @@ -11,6 +11,7 @@ #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_seg_common.h" +#include "vp9/common/vp9_treecoder.h" // TBD prediction functions for various bitstream signals @@ -383,26 +384,13 @@ void vp9_calc_ref_probs(int *count, vp9_prob *probs) { int tot_count; tot_count = count[0] + count[1] + count[2] + count[3]; - if (tot_count) { - probs[0] = (vp9_prob)((count[0] * 255 + (tot_count >> 1)) / tot_count); - probs[0] += !probs[0]; - } else - probs[0] = 128; + probs[0] = get_prob(count[0], tot_count); tot_count -= count[0]; - if (tot_count) { - probs[1] = (vp9_prob)((count[1] * 255 + (tot_count >> 1)) / tot_count); - probs[1] += !probs[1]; - } else - probs[1] = 128; + probs[1] = get_prob(count[1], tot_count); tot_count -= count[1]; - if (tot_count) { - probs[2] = (vp9_prob)((count[2] * 255 + (tot_count >> 1)) / tot_count); - probs[2] += !probs[2]; - } else - probs[2] = 128; - + probs[2] = get_prob(count[2], tot_count); } // Computes a set of modified conditional probabilities for the reference frame diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c index e567bac8d..d8bcd77c0 100644 --- a/vp9/common/vp9_recon.c +++ b/vp9/common/vp9_recon.c @@ -24,15 +24,7 @@ void vp9_recon_b_c for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { - int a = diff_ptr[c] + pred_ptr[c]; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dst_ptr[c] = (unsigned char) a; + dst_ptr[c] = clip_pixel(diff_ptr[c] + pred_ptr[c]); } dst_ptr += stride; @@ -52,15 +44,7 @@ void vp9_recon_uv_b_c for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { - int a = diff_ptr[c] + pred_ptr[c]; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dst_ptr[c] = (unsigned char) a; + dst_ptr[c] = clip_pixel(diff_ptr[c] + pred_ptr[c]); } dst_ptr += stride; @@ -79,15 +63,7 @@ void vp9_recon4b_c for (r = 0; r < 4; r++) { for (c = 0; c < 16; c++) { - int a = diff_ptr[c] + pred_ptr[c]; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dst_ptr[c] = (unsigned char) a; + dst_ptr[c] = clip_pixel(diff_ptr[c] + pred_ptr[c]); } dst_ptr += stride; @@ -107,15 +83,7 @@ void vp9_recon2b_c for (r = 0; r < 4; r++) { for (c = 0; c < 8; c++) { - int a = diff_ptr[c] + pred_ptr[c]; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dst_ptr[c] = (unsigned char) a; + dst_ptr[c] = clip_pixel(diff_ptr[c] + pred_ptr[c]); } dst_ptr += stride; @@ -133,12 +101,7 @@ void vp9_recon_mby_s_c(MACROBLOCKD *xd, uint8_t *dst) { for (y = 0; y < 16; y++) { for (x = 0; x < 16; x++) { - int a = dst[x] + diff[x]; - if (a < 0) - a = 0; - else if (a > 255) - a = 255; - dst[x] = a; + dst[x] = clip_pixel(dst[x] + diff[x]); } dst += stride; diff += 16; @@ -156,12 +119,7 @@ void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { for (y = 0; y < 8; y++) { for (x = 0; x < 8; x++) { - int a = dst[x] + diff[x]; - if (a < 0) - a = 0; - else if (a > 255) - a = 255; - dst[x] = a; + dst[x] = clip_pixel(dst[x] + diff[x]); } dst += stride; diff += 8; @@ -176,12 +134,7 @@ void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) { for (y = 0; y < 32; y++) { for (x = 0; x < 32; x++) { - int a = dst[x] + diff[x]; - if (a < 0) - a = 0; - else if (a > 255) - a = 255; - dst[x] = a; + dst[x] = clip_pixel(dst[x] + diff[x]); } dst += stride; diff += 32; @@ -195,18 +148,8 @@ void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { for (y = 0; y < 16; y++) { for (x = 0; x < 16; x++) { - int u = udst[x] + udiff[x]; - int v = vdst[x] + vdiff[x]; - if (u < 0) - u = 0; - else if (u > 255) - u = 255; - if (v < 0) - v = 0; - else if (v > 255) - v = 255; - udst[x] = u; - vdst[x] = v; + udst[x] = clip_pixel(udst[x] + udiff[x]); + vdst[x] = clip_pixel(vdst[x] + vdiff[x]); } udst += stride; vdst += stride; diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index fb4906e6a..11d1c97b1 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -332,8 +332,8 @@ void filter_mb(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int width, int height) { int i, j, k; - unsigned int Temp[32 * 32]; - unsigned int *pTmp = Temp; + unsigned int temp[32 * 32]; + unsigned int *pTmp = temp; unsigned char *pSrc = src - (1 + src_stride) * (PRED_FILT_LEN / 2); // Horizontal @@ -350,7 +350,7 @@ void filter_mb(unsigned char *src, int src_stride, } // Vertical - pTmp = Temp; + pTmp = temp; for (i = 0; i < width; i++) { unsigned char *pDst = dst + i; for (j = 0; j < height; j++) { @@ -358,8 +358,8 @@ void filter_mb(unsigned char *src, int src_stride, for (k = 0; k < PRED_FILT_LEN; k++) sum += pTmp[(j + k) * width] * pred_filter[k]; // Round - sum = (sum + ((1 << (filt_shift << 1)) >> 1)) >> (filt_shift << 1); - pDst[j * dst_stride] = (sum < 0 ? 0 : sum > 255 ? 255 : sum); + pDst[j * dst_stride] = (sum + ((1 << (filt_shift << 1)) >> 1)) >> + (filt_shift << 1); } ++pTmp; } diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 92492aa5b..14f94820d 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -124,18 +124,20 @@ static void d45_predictor(uint8_t *ypred_ptr, int y_stride, int n, } } for (c = 0; c <= r; ++c) { - int yabove_ext = yabove_row[r]; // 2*yabove_row[r] - yabove_row[r-1]; - int yleft_ext = yleft_col[r]; // 2*yleft_col[r] - yleft_col[r-1]; - yabove_ext = (yabove_ext > 255 ? 255 : (yabove_ext < 0 ? 0 : yabove_ext)); - yleft_ext = (yleft_ext > 255 ? 255 : (yleft_ext < 0 ? 0 : yleft_ext)); + int yabove_ext = yabove_row[r]; // clip_pixel(2 * yabove_row[r] - + // yabove_row[r - 1]); + int yleft_ext = yleft_col[r]; // clip_pixel(2 * yleft_col[r] - + // yleft_col[r-1]); ypred_ptr[(r - c) * y_stride + c] = (yabove_ext * (c + 1) + yleft_ext * (r - c + 1) + r / 2 + 1) / (r + 2); } for (r = 1; r < n; ++r) { - for (c = n - r; c < n; ++c) - ypred_ptr[r * y_stride + c] = (ypred_ptr[(r - 1) * y_stride + c] + - ypred_ptr[r * y_stride + c - 1] + 1) >> 1; + for (c = n - r; c < n; ++c) { + const int yabove_ext = ypred_ptr[(r - 1) * y_stride + c]; + const int yleft_ext = ypred_ptr[r * y_stride + c - 1]; + ypred_ptr[r * y_stride + c] = (yabove_ext + yleft_ext + 1) >> 1; + } } } @@ -321,15 +323,7 @@ void vp9_build_intra_predictors_internal(unsigned char *src, int src_stride, case TM_PRED: { for (r = 0; r < bsize; r++) { for (c = 0; c < bsize; c++) { - int pred = yleft_col[r] + yabove_row[ c] - ytop_left; - - if (pred < 0) - pred = 0; - - if (pred > 255) - pred = 255; - - ypred_ptr[c] = pred; + ypred_ptr[c] = clip_pixel(yleft_col[r] + yabove_row[c] - ytop_left); } ypred_ptr += y_stride; diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c index f542acb3d..82338901a 100644 --- a/vp9/common/vp9_reconintra4x4.c +++ b/vp9/common/vp9_reconintra4x4.c @@ -156,14 +156,14 @@ void vp9_intra4x4_predict(BLOCKD *x, unsigned char *predictor) { int i, r, c; - unsigned char *Above = *(x->base_dst) + x->dst - x->dst_stride; - unsigned char Left[4]; - unsigned char top_left = Above[-1]; + unsigned char *above = *(x->base_dst) + x->dst - x->dst_stride; + unsigned char left[4]; + unsigned char top_left = above[-1]; - Left[0] = (*(x->base_dst))[x->dst - 1]; - Left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride]; - Left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride]; - Left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride]; + left[0] = (*(x->base_dst))[x->dst - 1]; + left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride]; + left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride]; + left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride]; #if CONFIG_NEWBINTRAMODES if (b_mode == B_CONTEXT_PRED) @@ -175,8 +175,8 @@ void vp9_intra4x4_predict(BLOCKD *x, int expected_dc = 0; for (i = 0; i < 4; i++) { - expected_dc += Above[i]; - expected_dc += Left[i]; + expected_dc += above[i]; + expected_dc += left[i]; } expected_dc = (expected_dc + 4) >> 3; @@ -194,15 +194,7 @@ void vp9_intra4x4_predict(BLOCKD *x, /* prediction similar to true_motion prediction */ for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { - int pred = Above[c] - top_left + Left[r]; - - if (pred < 0) - pred = 0; - - if (pred > 255) - pred = 255; - - predictor[c] = pred; + predictor[c] = clip_pixel(above[c] - top_left + left[r]); } predictor += 16; @@ -213,10 +205,10 @@ void vp9_intra4x4_predict(BLOCKD *x, case B_VE_PRED: { unsigned int ap[4]; - ap[0] = Above[0]; - ap[1] = Above[1]; - ap[2] = Above[2]; - ap[3] = Above[3]; + ap[0] = above[0]; + ap[1] = above[1]; + ap[2] = above[2]; + ap[3] = above[3]; for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { @@ -234,10 +226,10 @@ void vp9_intra4x4_predict(BLOCKD *x, case B_HE_PRED: { unsigned int lp[4]; - lp[0] = Left[0]; - lp[1] = Left[1]; - lp[2] = Left[2]; - lp[3] = Left[3]; + lp[0] = left[0]; + lp[1] = left[1]; + lp[2] = left[2]; + lp[3] = left[3]; for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { @@ -249,7 +241,7 @@ void vp9_intra4x4_predict(BLOCKD *x, } break; case B_LD_PRED: { - unsigned char *ptr = Above; + unsigned char *ptr = above; predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2; predictor[0 * 16 + 1] = predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2; @@ -273,15 +265,15 @@ void vp9_intra4x4_predict(BLOCKD *x, unsigned char pp[9]; - pp[0] = Left[3]; - pp[1] = Left[2]; - pp[2] = Left[1]; - pp[3] = Left[0]; + pp[0] = left[3]; + pp[1] = left[2]; + pp[2] = left[1]; + pp[3] = left[0]; pp[4] = top_left; - pp[5] = Above[0]; - pp[6] = Above[1]; - pp[7] = Above[2]; - pp[8] = Above[3]; + pp[5] = above[0]; + pp[6] = above[1]; + pp[7] = above[2]; + pp[8] = above[3]; predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; predictor[3 * 16 + 1] = @@ -306,15 +298,15 @@ void vp9_intra4x4_predict(BLOCKD *x, unsigned char pp[9]; - pp[0] = Left[3]; - pp[1] = Left[2]; - pp[2] = Left[1]; - pp[3] = Left[0]; + pp[0] = left[3]; + pp[1] = left[2]; + pp[2] = left[1]; + pp[3] = left[0]; pp[4] = top_left; - pp[5] = Above[0]; - pp[6] = Above[1]; - pp[7] = Above[2]; - pp[8] = Above[3]; + pp[5] = above[0]; + pp[6] = above[1]; + pp[7] = above[2]; + pp[8] = above[3]; predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; @@ -338,7 +330,7 @@ void vp9_intra4x4_predict(BLOCKD *x, break; case B_VL_PRED: { - unsigned char *pp = Above; + unsigned char *pp = above; predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; @@ -361,15 +353,15 @@ void vp9_intra4x4_predict(BLOCKD *x, case B_HD_PRED: { unsigned char pp[9]; - pp[0] = Left[3]; - pp[1] = Left[2]; - pp[2] = Left[1]; - pp[3] = Left[0]; + pp[0] = left[3]; + pp[1] = left[2]; + pp[2] = left[1]; + pp[3] = left[0]; pp[4] = top_left; - pp[5] = Above[0]; - pp[6] = Above[1]; - pp[7] = Above[2]; - pp[8] = Above[3]; + pp[5] = above[0]; + pp[6] = above[1]; + pp[7] = above[2]; + pp[8] = above[3]; predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; @@ -393,7 +385,7 @@ void vp9_intra4x4_predict(BLOCKD *x, case B_HU_PRED: { - unsigned char *pp = Left; + unsigned char *pp = left; predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; predictor[0 * 16 + 2] = @@ -418,7 +410,7 @@ void vp9_intra4x4_predict(BLOCKD *x, break; /* case B_CORNER_PRED: - corner_predictor(predictor, 16, 4, Above, Left); + corner_predictor(predictor, 16, 4, above, left); break; */ #endif diff --git a/vp9/common/vp9_treecoder.c b/vp9/common/vp9_treecoder.c index 64018a100..fbc8a38cd 100644 --- a/vp9/common/vp9_treecoder.c +++ b/vp9/common/vp9_treecoder.c @@ -100,9 +100,7 @@ void vp9_tree_probs_from_distribution( vp9_tree tree, vp9_prob probs [ /* n-1 */ ], unsigned int branch_ct [ /* n-1 */ ] [2], - const unsigned int num_events[ /* n */ ], - unsigned int Pfac, - int rd + const unsigned int num_events[ /* n */ ] ) { const int tree_len = n - 1; int t = 0; @@ -110,29 +108,6 @@ void vp9_tree_probs_from_distribution( branch_counts(n, tok, tree, branch_ct, num_events); do { - const unsigned int *const c = branch_ct[t]; - const unsigned int tot = c[0] + c[1]; - -#if CONFIG_DEBUG - assert(tot < (1 << 24)); /* no overflow below */ -#endif - - if (tot) { - const unsigned int p = ((c[0] * Pfac) + (rd ? tot >> 1 : 0)) / tot; - probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */ - } else - probs[t] = vp9_prob_half; + probs[t] = get_binary_prob(branch_ct[t][0], branch_ct[t][1]); } while (++t < tree_len); } - -vp9_prob vp9_bin_prob_from_distribution(const unsigned int counts[2]) { - int tot_count = counts[0] + counts[1]; - vp9_prob prob; - if (tot_count) { - prob = (counts[0] * 255 + (tot_count >> 1)) / tot_count; - prob += !prob; - } else { - prob = 128; - } - return prob; -} diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h index bbf7e74d7..525b77dc8 100644 --- a/vp9/common/vp9_treecoder.h +++ b/vp9/common/vp9_treecoder.h @@ -12,6 +12,8 @@ #ifndef VP9_COMMON_VP9_TREECODER_H_ #define VP9_COMMON_VP9_TREECODER_H_ +#include "vpx/vpx_integer.h" + typedef unsigned char vp9_prob; #define vp9_prob_half ( (vp9_prob) 128) @@ -65,19 +67,24 @@ void vp9_tree_probs_from_distribution( vp9_tree tree, vp9_prob probs [ /* n-1 */ ], unsigned int branch_ct [ /* n-1 */ ] [2], - const unsigned int num_events[ /* n */ ], - unsigned int Pfactor, - int Round + const unsigned int num_events[ /* n */ ] ); -static __inline int clip_prob(int p) { - if (p > 255) - return 255; - else if (p < 1) - return 1; - return p; +static __inline vp9_prob clip_prob(int p) { + return (p > 255) ? 255u : (p < 1) ? 1u : p; } -vp9_prob vp9_bin_prob_from_distribution(const unsigned int counts[2]); +static __inline vp9_prob get_prob(int num, int den) { + return (den == 0) ? 128u : clip_prob((num * 256 + (den >> 1)) / den); +} + +static __inline vp9_prob get_binary_prob(int n0, int n1) { + return get_prob(n0, n0 + n1); +} + +/* this function assumes prob1 and prob2 are already within [1,255] range */ +static __inline vp9_prob weighted_prob(int prob1, int prob2, int factor) { + return (prob1 * (256 - factor) + prob2 * factor + 128) >> 8; +} #endif diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 22a66716f..9e382914b 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -13,6 +13,7 @@ #include "vp9/decoder/vp9_dequantize.h" #include "vpx_mem/vpx_mem.h" #include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/common/vp9_common.h" static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride, int width, int height) { @@ -20,14 +21,7 @@ static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch, for (r = 0; r < height; r++) { for (c = 0; c < width; c++) { - int a = diff[c] + pred[c]; - - if (a < 0) - a = 0; - else if (a > 255) - a = 255; - - dest[c] = (uint8_t) a; + dest[c] = clip_pixel(diff[c] + pred[c]); } dest += stride; @@ -43,14 +37,7 @@ static void add_constant_residual(const int16_t diff, const uint8_t *pred, for (r = 0; r < height; r++) { for (c = 0; c < width; c++) { - int a = diff + pred[c]; - - if (a < 0) - a = 0; - else if (a > 255) - a = 255; - - dest[c] = (uint8_t) a; + dest[c] = clip_pixel(diff + pred[c]); } dest += stride; diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 09d0e9929..498f64752 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -30,6 +30,7 @@ #include "vp9/encoder/vp9_encodemv.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_mvref_common.h" +#include "vp9/common/vp9_treecoder.h" #if defined(SECTIONBITS_OUTPUT) unsigned __int64 Sectionbits[500]; @@ -110,11 +111,8 @@ static void update_mode( unsigned int new_b = 0, old_b = 0; int i = 0; - vp9_tree_probs_from_distribution( - n--, tok, tree, - Pnew, bct, num_events, - 256, 1 - ); + vp9_tree_probs_from_distribution(n--, tok, tree, + Pnew, bct, num_events); do { new_b += cost_branch(bct[i], Pnew[i]); @@ -155,18 +153,6 @@ static void update_mbintra_mode_probs(VP9_COMP* const cpi, } } -static int get_prob(int num, int den) { - int p; - if (den <= 0) - return 128; - p = (num * 255 + (den >> 1)) / den; - return clip_prob(p); -} - -static int get_binary_prob(int n0, int n1) { - return get_prob(n0, n0 + n1); -} - void vp9_update_skip_probs(VP9_COMP *cpi) { VP9_COMMON *const pc = &cpi->common; int k; @@ -187,7 +173,7 @@ static void update_switchable_interp_probs(VP9_COMP *cpi, VP9_SWITCHABLE_FILTERS, vp9_switchable_interp_encodings, vp9_switchable_interp_tree, pc->fc.switchable_interp_prob[j], branch_ct, - cpi->switchable_interp_count[j], 256, 1); + cpi->switchable_interp_count[j]); for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) { if (pc->fc.switchable_interp_prob[j][i] < 1) pc->fc.switchable_interp_prob[j][i] = 1; @@ -257,13 +243,11 @@ static void update_mode_probs(VP9_COMMON *cm, for (i = 0; i < INTER_MODE_CONTEXTS; i++) { for (j = 0; j < 4; j++) { - int new_prob, count, old_cost, new_cost; + int new_prob, old_cost, new_cost; // Work out cost of coding branches with the old and optimal probability old_cost = cost_branch256(mv_ref_ct[i][j], mode_context[i][j]); - count = mv_ref_ct[i][j][0] + mv_ref_ct[i][j][1]; - new_prob = count > 0 ? (255 * mv_ref_ct[i][j][0]) / count : 128; - new_prob = (new_prob > 0) ? new_prob : 1; + new_prob = get_binary_prob(mv_ref_ct[i][j][0], mv_ref_ct[i][j][1]); new_cost = cost_branch256(mv_ref_ct[i][j], new_prob); // If cost saving is >= 14 bits then update the mode probability. @@ -1404,7 +1388,7 @@ static void build_tree_distribution(vp9_coeff_probs *coef_probs, vp9_coef_encodings, vp9_coef_tree, coef_probs[i][j][k], coef_branch_ct[i][j][k], - coef_counts[i][j][k], 256, 1); + coef_counts[i][j][k]); #ifdef ENTROPY_STATS if (!cpi->dummy_packing) for (t = 0; t < MAX_ENTROPY_TOKENS; ++t) diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 04ee3f610..61ccc7ecb 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -2153,17 +2153,10 @@ void print_mode_context(void) { fprintf(f, " "); for (i = 0; i < 4; i++) { int this_prob; - int count; // context probs - count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; - if (count) - this_prob = 256 * mv_ref_ct[j][i][0] / count; - else - this_prob = 128; + this_prob = get_binary_prob(mv_ref_ct[j][i][0], mv_ref_ct[j][i][1]); - if (this_prob == 0) - this_prob = 1; fprintf(f, "%5d, ", this_prob); } fprintf(f, " },\n"); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index bdc39c1af..07a3a0444 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -216,7 +216,7 @@ static void set_mvcost(MACROBLOCK *mb) { static void init_base_skip_probs(void) { int i; double q; - int skip_prob, t; + int t; for (i = 0; i < QINDEX_RANGE; i++) { q = vp9_convert_qindex_to_q(i); @@ -225,26 +225,9 @@ static void init_base_skip_probs(void) { // Based on crude best fit of old table. t = (int)(564.25 * pow(2.71828, (-0.012 * q))); - skip_prob = t; - if (skip_prob < 1) - skip_prob = 1; - else if (skip_prob > 255) - skip_prob = 255; - base_skip_false_prob[i][1] = skip_prob; - - skip_prob = t * 3 / 4; - if (skip_prob < 1) - skip_prob = 1; - else if (skip_prob > 255) - skip_prob = 255; - base_skip_false_prob[i][2] = skip_prob; - - skip_prob = t * 5 / 4; - if (skip_prob < 1) - skip_prob = 1; - else if (skip_prob > 255) - skip_prob = 255; - base_skip_false_prob[i][0] = skip_prob; + base_skip_false_prob[i][1] = clip_prob(t); + base_skip_false_prob[i][2] = clip_prob(t * 3 / 4); + base_skip_false_prob[i][0] = clip_prob(t * 5 / 4); } } @@ -2875,23 +2858,11 @@ void select_pred_filter_mode(VP9_COMP *cpi) { void update_pred_filt_prob(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; - int prob_pred_filter_off; // Based on the selection in the previous frame determine what mode // to use for the current frame and work out the signaling probability - if (cpi->pred_filter_on_count + cpi->pred_filter_off_count) { - prob_pred_filter_off = cpi->pred_filter_off_count * 256 / - (cpi->pred_filter_on_count + cpi->pred_filter_off_count); - - if (prob_pred_filter_off < 1) - prob_pred_filter_off = 1; - - if (prob_pred_filter_off > 255) - prob_pred_filter_off = 255; - - cm->prob_pred_filter_off = prob_pred_filter_off; - } else - cm->prob_pred_filter_off = 128; + cm->prob_pred_filter_off = get_binary_prob(cpi->pred_filter_off_count, + cpi->pred_filter_on_count); /* { FILE *fp = fopen("filt_use.txt", "a"); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 9b87713f9..9265a0237 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3045,7 +3045,6 @@ static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, unsigned int int pred_flag; int pred_ctx; int i; - int tot_count; vp9_prob pred_prob, new_pred_prob; int seg_ref_active; @@ -3069,13 +3068,8 @@ static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, unsigned int // Predict probability for current frame based on stats so far pred_ctx = vp9_get_pred_context(cm, xd, PRED_REF); - tot_count = cpi->ref_pred_count[pred_ctx][0] + cpi->ref_pred_count[pred_ctx][1]; - if (tot_count) { - new_pred_prob = - (cpi->ref_pred_count[pred_ctx][0] * 255 + (tot_count >> 1)) / tot_count; - new_pred_prob += !new_pred_prob; - } else - new_pred_prob = 128; + new_pred_prob = get_binary_prob(cpi->ref_pred_count[pred_ctx][0], + cpi->ref_pred_count[pred_ctx][1]); // Get the set of probabilities to use if prediction fails mod_refprobs = cm->mod_refprobs[pred_ref]; diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index 5e8d94c8c..ee90f4fc3 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -107,31 +107,15 @@ static void calc_segtree_probs(MACROBLOCKD *xd, int *segcounts, vp9_prob *segment_tree_probs) { int count1, count2; - int tot_count; - int i; - - // Blank the strtucture to start with - vpx_memset(segment_tree_probs, 0, - MB_FEATURE_TREE_PROBS * sizeof(*segment_tree_probs)); // Total count for all segments count1 = segcounts[0] + segcounts[1]; count2 = segcounts[2] + segcounts[3]; - tot_count = count1 + count2; // Work out probabilities of each segment - if (tot_count) - segment_tree_probs[0] = (count1 * 255) / tot_count; - if (count1 > 0) - segment_tree_probs[1] = (segcounts[0] * 255) / count1; - if (count2 > 0) - segment_tree_probs[2] = (segcounts[2] * 255) / count2; - - // Clamp probabilities to minimum allowed value - for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) { - if (segment_tree_probs[i] == 0) - segment_tree_probs[i] = 1; - } + segment_tree_probs[0] = get_binary_prob(count1, count2); + segment_tree_probs[1] = get_prob(segcounts[0], count1); + segment_tree_probs[2] = get_prob(segcounts[2], count2); } // Based on set of segment counts and probabilities calculate a cost estimate @@ -165,7 +149,6 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { MACROBLOCKD *const xd = &cpi->mb.e_mbd; int i; - int tot_count; int no_pred_cost; int t_pred_cost = INT_MAX; int pred_context; @@ -297,20 +280,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // Add in the cost of the signalling for each prediction context for (i = 0; i < PREDICTION_PROBS; i++) { - tot_count = temporal_predictor_count[i][0] + - temporal_predictor_count[i][1]; - - // Work out the context probabilities for the segment - // prediction flag - if (tot_count) { - t_nopred_prob[i] = (temporal_predictor_count[i][0] * 255) / - tot_count; - - // Clamp to minimum allowed value - if (t_nopred_prob[i] < 1) - t_nopred_prob[i] = 1; - } else - t_nopred_prob[i] = 1; + t_nopred_prob[i] = get_binary_prob(temporal_predictor_count[i][0], + temporal_predictor_count[i][1]); // Add in the predictor signaling cost t_pred_cost += (temporal_predictor_count[i][0] * diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 9a0e8f3d9..bcc3b1f81 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -627,8 +627,7 @@ static void print_probs(FILE *f, vp9_coeff_accum *context_counters, coef_counts[t] = context_counters[type][band][pt][t]; vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS, vp9_coef_encodings, vp9_coef_tree, - coef_probs, branch_ct, coef_counts, - 256, 1); + coef_probs, branch_ct, coef_counts); fprintf(f, "%s\n {", Comma(pt)); t = 0; From 5a5df19de30fec82bd64a846e4d0341b7d03594a Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Wed, 12 Dec 2012 10:25:58 -0800 Subject: [PATCH 18/77] New default coefficient/band probabilities. Gives 0.5-0.6% improvement on derf and stdhd, and 1.1% on hd. The old tables basically derive from times that we had only 4x4 or only 4x4 and 8x8 DCTs. Note that some values are filled with 128, because e.g. ADST ever only occurs as Y-with-DC, as does 32x32; 16x16 ever only occurs as Y-with-DC or as UV (as complement of 32x32 Y); and 8x8 Y2 ever only has 4 coefficients max. If preferred, I can add values of other tables in their place (e.g. use 4x4 2nd order high-frequency probabilities for 8x8 2nd order), so that they make at least some sense if we ever implement a larger 2nd order transform for the 8x8 DCT (etc.), please let me know Change-Id: I917db356f2aff8865f528eb873c56ef43aa5ce22 --- vp9/common/vp9_default_coef_probs.h | 2443 +++++++++++++-------------- vp9/encoder/vp9_tokenize.c | 33 +- 2 files changed, 1167 insertions(+), 1309 deletions(-) diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index 1255fce5e..afd517064 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -13,1346 +13,1201 @@ static const vp9_coeff_probs default_coef_probs_4x4[BLOCK_TYPES_4X4] = { - { - /* Block Type ( 0 ) */ - { - /* Coeff Band ( 0 )*/ + { /* block Type 0 */ + { /* Coeff Band 0 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 1 )*/ - { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, - { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, - { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }, - { 90, 116, 227, 252, 214, 209, 255, 255, 128, 128, 128 }, - }, - { - /* Coeff Band ( 2 )*/ - { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, - { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, - { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }, - { 64, 128, 202, 247, 198, 180, 255, 219, 128, 128, 128 }, - }, - { - /* Coeff Band ( 3 )*/ - { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, - { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, - { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }, - { 64, 100, 216, 255, 236, 230, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 4 )*/ - { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, - { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, - { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }, - { 28, 110, 196, 243, 228, 255, 255, 255, 128, 128, 128 }, - }, - { - /* Coeff Band ( 5 )*/ - { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, - { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, - { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }, - { 90, 90, 231, 255, 211, 171, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 6 )*/ - { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, - { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, - { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }, - { 64, 120, 211, 255, 194, 224, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 7 )*/ - { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 224, 180, 254, 255, 234, 224, 255, 227, 128, 128, 128 }, + { 187, 178, 250, 255, 226, 218, 255, 229, 255, 255, 128 }, + { 145, 171, 243, 253, 219, 211, 254, 226, 255, 224, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 1, 187, 252, 255, 231, 220, 255, 229, 255, 255, 128 }, + { 129, 174, 244, 254, 225, 216, 253, 219, 255, 255, 128 }, + { 16, 131, 193, 251, 205, 205, 254, 222, 255, 255, 128 }, + { 2, 93, 136, 236, 159, 179, 255, 197, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 1, 188, 254, 255, 241, 236, 254, 220, 255, 255, 128 }, + { 133, 165, 249, 255, 236, 220, 252, 220, 255, 255, 128 }, + { 20, 112, 203, 254, 217, 214, 255, 224, 255, 255, 128 }, + { 4, 61, 106, 240, 155, 189, 252, 202, 255, 255, 128 } + }, { /* Coeff Band 4 */ + { 1, 168, 252, 255, 239, 228, 253, 217, 255, 255, 128 }, + { 158, 163, 247, 255, 231, 221, 255, 242, 128, 128, 128 }, + { 23, 127, 205, 253, 212, 224, 255, 234, 255, 255, 128 }, + { 2, 83, 141, 237, 176, 210, 245, 207, 255, 255, 128 } + }, { /* Coeff Band 5 */ + { 1, 233, 254, 255, 243, 241, 255, 213, 128, 128, 128 }, + { 155, 213, 253, 255, 240, 221, 216, 112, 255, 255, 128 }, + { 41, 159, 237, 254, 229, 216, 255, 161, 128, 128, 128 }, + { 11, 95, 176, 244, 194, 191, 255, 167, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 1, 160, 253, 255, 238, 231, 255, 230, 255, 255, 128 }, + { 174, 152, 248, 255, 230, 223, 255, 223, 255, 255, 128 }, + { 86, 125, 213, 253, 207, 207, 254, 224, 255, 171, 128 }, + { 39, 89, 156, 240, 168, 190, 251, 181, 255, 255, 128 } + }, { /* Coeff Band 7 */ + { 1, 101, 255, 255, 243, 244, 255, 255, 128, 128, 128 }, + { 230, 66, 255, 255, 238, 238, 128, 128, 128, 128, 128 }, + { 151, 92, 229, 255, 224, 197, 128, 128, 128, 128, 128 }, + { 109, 57, 171, 255, 73, 255, 128, 128, 128, 128, 128 } } - }, - { - /* Block Type ( 1 ) */ - { - /* Coeff Band ( 0 )*/ - { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, - { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, - { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }, - { 48, 32, 146, 208, 149, 167, 221, 162, 255, 223, 128 }, - }, - { - /* Coeff Band ( 1 )*/ - { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, - { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, - { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }, - { 66, 90, 181, 242, 176, 190, 249, 202, 255, 255, 128 }, - }, - { - /* Coeff Band ( 2 )*/ - { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, - { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, - { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }, - { 18, 80, 163, 242, 170, 187, 247, 210, 255, 255, 128 }, - }, - { - /* Coeff Band ( 3 )*/ - { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, - { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, - { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }, - { 36, 120, 201, 253, 205, 192, 255, 255, 128, 128, 128 }, - }, - { - /* Coeff Band ( 4 )*/ - { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, - { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, - { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }, - { 18, 90, 174, 245, 186, 161, 255, 199, 128, 128, 128 }, - }, - { - /* Coeff Band ( 5 )*/ - { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, - { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, - { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }, - { 28, 70, 181, 251, 193, 211, 255, 205, 128, 128, 128 }, - }, - { - /* Coeff Band ( 6 )*/ - { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, - { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, - { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }, - { 40, 90, 188, 251, 195, 217, 255, 224, 128, 128, 128 }, - }, - { - /* Coeff Band ( 7 )*/ - { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, - { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }, - { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }, + }, { /* block Type 1 */ + { /* Coeff Band 0 */ + { 148, 109, 219, 239, 203, 184, 222, 172, 238, 203, 192 }, + { 101, 110, 206, 229, 181, 178, 224, 171, 250, 206, 180 }, + { 67, 108, 186, 222, 172, 174, 216, 167, 246, 195, 221 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 184, 249, 254, 226, 220, 253, 241, 255, 255, 128 }, + { 84, 182, 244, 254, 222, 218, 254, 217, 255, 255, 128 }, + { 56, 147, 210, 252, 208, 210, 253, 218, 255, 255, 128 }, + { 32, 124, 170, 233, 165, 178, 249, 196, 255, 253, 128 } + }, { /* Coeff Band 2 */ + { 1, 182, 242, 245, 208, 194, 239, 179, 255, 238, 128 }, + { 28, 170, 230, 241, 202, 192, 243, 171, 255, 243, 128 }, + { 16, 109, 165, 231, 182, 184, 237, 168, 255, 249, 255 }, + { 2, 76, 113, 202, 141, 172, 221, 160, 252, 227, 255 } + }, { /* Coeff Band 3 */ + { 1, 195, 249, 254, 230, 239, 251, 211, 255, 255, 128 }, + { 39, 164, 242, 254, 224, 222, 255, 235, 255, 255, 128 }, + { 16, 111, 179, 251, 204, 197, 251, 234, 255, 209, 128 }, + { 3, 84, 130, 225, 155, 176, 226, 196, 255, 238, 128 } + }, { /* Coeff Band 4 */ + { 1, 180, 248, 254, 227, 219, 254, 211, 255, 255, 128 }, + { 38, 170, 242, 253, 222, 214, 254, 242, 255, 255, 128 }, + { 5, 111, 176, 250, 204, 197, 255, 208, 128, 128, 128 }, + { 1, 75, 120, 233, 146, 186, 250, 203, 255, 255, 128 } + }, { /* Coeff Band 5 */ + { 1, 183, 251, 255, 232, 223, 252, 229, 255, 255, 128 }, + { 51, 158, 245, 255, 230, 224, 255, 239, 128, 128, 128 }, + { 13, 80, 158, 253, 206, 216, 255, 233, 128, 128, 128 }, + { 4, 39, 76, 212, 107, 153, 252, 206, 255, 255, 128 } + }, { /* Coeff Band 6 */ + { 1, 181, 252, 254, 231, 214, 242, 225, 255, 236, 128 }, + { 81, 167, 247, 254, 229, 217, 252, 226, 255, 255, 128 }, + { 20, 122, 195, 253, 213, 212, 249, 211, 255, 238, 128 }, + { 18, 100, 153, 231, 158, 182, 244, 203, 255, 219, 128 } + }, { /* Coeff Band 7 */ + { 1, 100, 254, 255, 242, 246, 255, 230, 128, 128, 128 }, + { 177, 62, 250, 255, 246, 210, 255, 255, 128, 128, 128 }, + { 65, 58, 186, 255, 227, 241, 255, 219, 128, 128, 128 }, + { 45, 23, 118, 244, 162, 208, 255, 228, 128, 128, 128 } } - }, - { - /* Block Type ( 2 ) */ - { - /* Coeff Band ( 0 )*/ - { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, - { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, - { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }, - { 64, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }, - }, - { - /* Coeff Band ( 1 )*/ - { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, - { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, - { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }, - { 140, 70, 195, 248, 188, 195, 255, 255, 128, 128, 128 }, - }, - { - /* Coeff Band ( 2 )*/ - { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, - { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, - { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }, - { 60, 40, 190, 239, 201, 218, 255, 228, 128, 128, 128 }, - }, - { - /* Coeff Band ( 3 )*/ - { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, - { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 132, 118, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 4 )*/ - { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, - { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 5 )*/ - { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 6 )*/ - { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 48, 85, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 7 )*/ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, { /* block Type 2 */ + { /* Coeff Band 0 */ + { 242, 73, 238, 244, 198, 192, 241, 189, 253, 226, 247 }, + { 171, 70, 204, 231, 180, 183, 228, 172, 247, 215, 221 }, + { 73, 62, 144, 202, 153, 169, 207, 153, 245, 199, 230 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 163, 241, 245, 201, 192, 243, 191, 255, 229, 255 }, + { 165, 147, 230, 245, 201, 193, 244, 193, 255, 231, 255 }, + { 76, 109, 191, 243, 190, 193, 243, 192, 255, 231, 255 }, + { 22, 63, 111, 202, 138, 164, 225, 164, 252, 218, 248 } + }, { /* Coeff Band 2 */ + { 1, 113, 225, 245, 201, 195, 238, 185, 254, 225, 255 }, + { 122, 105, 195, 236, 183, 186, 235, 180, 254, 227, 252 }, + { 38, 79, 135, 217, 154, 172, 229, 171, 253, 220, 250 }, + { 9, 53, 78, 161, 121, 151, 202, 141, 251, 207, 244 } + }, { /* Coeff Band 3 */ + { 1, 150, 238, 250, 213, 202, 244, 194, 255, 236, 255 }, + { 140, 132, 223, 247, 204, 199, 243, 193, 255, 234, 255 }, + { 51, 101, 182, 240, 188, 189, 240, 186, 255, 232, 255 }, + { 6, 59, 100, 201, 137, 165, 225, 161, 252, 221, 249 } + }, { /* Coeff Band 4 */ + { 1, 151, 233, 248, 205, 199, 248, 196, 255, 243, 255 }, + { 133, 140, 214, 244, 193, 193, 245, 194, 255, 236, 255 }, + { 27, 104, 168, 235, 172, 183, 243, 187, 254, 235, 255 }, + { 2, 61, 101, 202, 135, 164, 229, 167, 254, 223, 255 } + }, { /* Coeff Band 5 */ + { 1, 227, 246, 254, 225, 215, 254, 217, 255, 255, 128 }, + { 132, 195, 239, 253, 219, 210, 252, 212, 255, 255, 128 }, + { 49, 143, 214, 251, 207, 204, 253, 212, 255, 238, 128 }, + { 11, 93, 151, 235, 169, 185, 247, 190, 255, 238, 128 } + }, { /* Coeff Band 6 */ + { 1, 143, 237, 251, 213, 203, 249, 203, 255, 243, 128 }, + { 137, 120, 216, 246, 198, 196, 248, 199, 255, 240, 255 }, + { 50, 94, 166, 233, 169, 181, 245, 189, 255, 240, 255 }, + { 9, 56, 97, 190, 129, 158, 228, 159, 255, 226, 255 } + }, { /* Coeff Band 7 */ + { 1, 96, 245, 254, 229, 216, 255, 212, 255, 255, 128 }, + { 179, 81, 234, 253, 217, 209, 255, 230, 255, 255, 128 }, + { 105, 56, 192, 248, 192, 197, 252, 212, 255, 205, 128 }, + { 53, 32, 133, 228, 151, 177, 250, 192, 255, 255, 128 } } - }, - { - /* Block Type ( 3 ) */ - { - /* Coeff Band ( 0 )*/ - { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, - { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, - { 63, 48, 138, 219, 151, 178, 240, 170, 255, 216, 128 }, - { 54, 40, 138, 219, 151, 178, 240, 170, 255, 216, 128 }, - }, - { - /* Coeff Band ( 1 )*/ - { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, - { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, - { 44, 84, 162, 232, 172, 180, 245, 178, 255, 255, 128 }, - { 32, 70, 162, 232, 172, 180, 245, 178, 255, 255, 128 }, - }, - { - /* Coeff Band ( 2 )*/ - { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, - { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, - { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }, - { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }, - }, - { - /* Coeff Band ( 3 )*/ - { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, - { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, - { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }, - { 26, 104, 170, 242, 183, 194, 254, 223, 255, 255, 128 }, - }, - { - /* Coeff Band ( 4 )*/ - { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, - { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, - { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }, - { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }, - }, - { - /* Coeff Band ( 5 )*/ - { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, - { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, - { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }, - { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }, - }, - { - /* Coeff Band ( 6 )*/ - { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, - { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, - { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }, - { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }, - }, - { - /* Coeff Band ( 7 )*/ - { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, { /* block Type 3 */ + { /* Coeff Band 0 */ + { 209, 89, 216, 242, 191, 190, 245, 191, 240, 235, 168 }, + { 142, 96, 196, 229, 173, 180, 233, 175, 247, 220, 174 }, + { 66, 89, 157, 205, 155, 171, 209, 156, 243, 200, 197 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 159, 235, 246, 202, 197, 237, 186, 248, 223, 223 }, + { 96, 137, 223, 247, 203, 198, 242, 188, 241, 202, 209 }, + { 22, 95, 167, 243, 184, 196, 237, 187, 247, 221, 221 }, + { 3, 51, 81, 192, 125, 158, 220, 164, 242, 211, 197 } + }, { /* Coeff Band 2 */ + { 1, 145, 226, 244, 196, 194, 240, 191, 247, 225, 233 }, + { 66, 127, 203, 240, 188, 189, 239, 188, 248, 225, 220 }, + { 9, 83, 136, 224, 159, 176, 235, 177, 247, 223, 207 }, + { 2, 46, 71, 169, 121, 152, 210, 149, 241, 212, 199 } + }, { /* Coeff Band 3 */ + { 1, 174, 238, 249, 209, 201, 245, 198, 241, 196, 241 }, + { 76, 151, 223, 247, 203, 197, 245, 194, 243, 202, 198 }, + { 12, 102, 170, 240, 183, 187, 242, 191, 247, 225, 209 }, + { 1, 52, 85, 202, 135, 162, 225, 168, 240, 209, 221 } + }, { /* Coeff Band 4 */ + { 1, 140, 230, 247, 204, 198, 242, 190, 249, 209, 248 }, + { 94, 126, 213, 244, 195, 194, 240, 190, 247, 210, 237 }, + { 13, 95, 159, 232, 171, 181, 237, 179, 245, 205, 237 }, + { 1, 51, 83, 186, 128, 158, 216, 154, 240, 193, 229 } + }, { /* Coeff Band 5 */ + { 1, 218, 244, 251, 214, 202, 243, 199, 253, 214, 255 }, + { 91, 194, 238, 249, 210, 200, 247, 203, 251, 223, 255 }, + { 18, 140, 207, 247, 198, 194, 246, 203, 252, 213, 255 }, + { 3, 76, 126, 223, 156, 172, 233, 185, 251, 206, 255 } + }, { /* Coeff Band 6 */ + { 1, 135, 235, 250, 210, 203, 246, 206, 251, 219, 241 }, + { 105, 120, 214, 246, 196, 196, 245, 195, 250, 216, 243 }, + { 24, 91, 154, 231, 166, 180, 241, 183, 250, 214, 242 }, + { 3, 53, 84, 183, 127, 157, 218, 153, 244, 195, 237 } + }, { /* Coeff Band 7 */ + { 1, 83, 246, 252, 215, 208, 246, 206, 255, 237, 128 }, + { 184, 61, 233, 250, 208, 204, 245, 198, 254, 227, 255 }, + { 83, 58, 190, 246, 189, 195, 244, 198, 255, 229, 128 }, + { 41, 38, 125, 214, 144, 169, 229, 171, 251, 216, 255 } } } }; - static const vp9_coeff_probs default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4] = { - { - /* Block Type ( 0 ) */ - { - /* Coeff Band ( 0 )*/ + { /* block Type 0 */ + { /* Coeff Band 0 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 1 )*/ - { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, - { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, - { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }, - { 90, 116, 227, 252, 214, 209, 255, 255, 128, 128, 128 }, - }, - { - /* Coeff Band ( 2 )*/ - { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, - { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, - { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }, - { 64, 128, 202, 247, 198, 180, 255, 219, 128, 128, 128 }, - }, - { - /* Coeff Band ( 3 )*/ - { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, - { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, - { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }, - { 64, 100, 216, 255, 236, 230, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 4 )*/ - { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, - { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, - { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }, - { 28, 110, 196, 243, 228, 255, 255, 255, 128, 128, 128 }, - }, - { - /* Coeff Band ( 5 )*/ - { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, - { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, - { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }, - { 90, 90, 231, 255, 211, 171, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 6 )*/ - { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, - { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, - { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }, - { 64, 120, 211, 255, 194, 224, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 7 )*/ - { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } - }, - { - /* Block Type ( 1 ) */ - { - /* Coeff Band ( 0 )*/ - { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, - { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, - { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }, - { 48, 32, 146, 208, 149, 167, 221, 162, 255, 223, 128 }, - }, - { - /* Coeff Band ( 1 )*/ - { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, - { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, - { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }, - { 66, 90, 181, 242, 176, 190, 249, 202, 255, 255, 128 }, - }, - { - /* Coeff Band ( 2 )*/ - { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, - { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, - { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }, - { 18, 80, 163, 242, 170, 187, 247, 210, 255, 255, 128 }, - }, - { - /* Coeff Band ( 3 )*/ - { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, - { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, - { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }, - { 36, 120, 201, 253, 205, 192, 255, 255, 128, 128, 128 }, - }, - { - /* Coeff Band ( 4 )*/ - { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, - { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, - { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }, - { 18, 90, 174, 245, 186, 161, 255, 199, 128, 128, 128 }, - }, - { - /* Coeff Band ( 5 )*/ - { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, - { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, - { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }, - { 28, 70, 181, 251, 193, 211, 255, 205, 128, 128, 128 }, - }, - { - /* Coeff Band ( 6 )*/ - { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, - { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, - { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }, - { 40, 90, 188, 251, 195, 217, 255, 224, 128, 128, 128 }, - }, - { - /* Coeff Band ( 7 )*/ - { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, - { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }, - { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }, + }, { /* block Type 1 */ + { /* Coeff Band 0 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } - }, - { - /* Block Type ( 2 ) */ - { - /* Coeff Band ( 0 )*/ - { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, - { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, - { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }, - { 64, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }, - }, - { - /* Coeff Band ( 1 )*/ - { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, - { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, - { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }, - { 140, 70, 195, 248, 188, 195, 255, 255, 128, 128, 128 }, - }, - { - /* Coeff Band ( 2 )*/ - { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, - { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, - { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }, - { 60, 40, 190, 239, 201, 218, 255, 228, 128, 128, 128 }, - }, - { - /* Coeff Band ( 3 )*/ - { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, - { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 132, 118, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 4 )*/ - { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, - { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 5 )*/ - { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 6 )*/ - { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 48, 85, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - }, - { - /* Coeff Band ( 7 )*/ + }, { /* block Type 2 */ + { /* Coeff Band 0 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } - }, - { - /* Block Type ( 3 ) */ - { - /* Coeff Band ( 0 )*/ - { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, - { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, - { 63, 48, 138, 219, 151, 178, 240, 170, 255, 216, 128 }, - { 54, 40, 138, 219, 151, 178, 240, 170, 255, 216, 128 }, - }, - { - /* Coeff Band ( 1 )*/ - { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, - { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, - { 44, 84, 162, 232, 172, 180, 245, 178, 255, 255, 128 }, - { 32, 70, 162, 232, 172, 180, 245, 178, 255, 255, 128 }, - }, - { - /* Coeff Band ( 2 )*/ - { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, - { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, - { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }, - { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }, - }, - { - /* Coeff Band ( 3 )*/ - { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, - { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, - { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }, - { 26, 104, 170, 242, 183, 194, 254, 223, 255, 255, 128 }, - }, - { - /* Coeff Band ( 4 )*/ - { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, - { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, - { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }, - { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }, - }, - { - /* Coeff Band ( 5 )*/ - { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, - { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, - { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }, - { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }, - }, - { - /* Coeff Band ( 6 )*/ - { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, - { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, - { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }, - { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }, - }, - { - /* Coeff Band ( 7 )*/ - { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, { /* block Type 3 */ + { /* Coeff Band 0 */ + { 191, 34, 178, 193, 160, 173, 196, 142, 247, 191, 244 }, + { 84, 45, 129, 187, 145, 170, 189, 145, 240, 186, 212 }, + { 14, 36, 69, 149, 120, 154, 177, 136, 231, 177, 196 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 76, 169, 226, 167, 180, 227, 171, 247, 218, 226 }, + { 72, 75, 162, 226, 166, 181, 231, 172, 242, 200, 219 }, + { 30, 63, 130, 218, 153, 175, 226, 170, 247, 216, 219 }, + { 5, 39, 67, 156, 119, 151, 194, 140, 239, 202, 216 } + }, { /* Coeff Band 2 */ + { 1, 79, 182, 228, 175, 183, 224, 170, 247, 215, 220 }, + { 69, 77, 168, 224, 170, 180, 223, 168, 246, 215, 223 }, + { 24, 63, 126, 209, 153, 171, 219, 160, 247, 215, 225 }, + { 3, 35, 58, 151, 115, 151, 191, 138, 240, 199, 220 } + }, { /* Coeff Band 3 */ + { 1, 139, 213, 238, 194, 192, 234, 180, 244, 193, 236 }, + { 82, 127, 204, 238, 190, 186, 234, 175, 244, 191, 235 }, + { 26, 93, 161, 230, 173, 179, 233, 178, 249, 217, 241 }, + { 3, 48, 78, 186, 132, 158, 212, 157, 244, 205, 233 } + }, { /* Coeff Band 4 */ + { 1, 100, 208, 233, 180, 182, 238, 175, 250, 206, 225 }, + { 84, 87, 184, 230, 175, 180, 236, 179, 250, 209, 243 }, + { 14, 61, 111, 217, 146, 171, 236, 174, 249, 207, 245 }, + { 1, 32, 49, 150, 106, 142, 212, 145, 242, 191, 237 } + }, { /* Coeff Band 5 */ + { 1, 130, 223, 241, 192, 189, 231, 176, 250, 209, 246 }, + { 101, 120, 207, 239, 188, 187, 240, 196, 250, 202, 255 }, + { 19, 90, 155, 232, 169, 181, 238, 190, 250, 207, 249 }, + { 1, 54, 86, 197, 130, 161, 220, 170, 248, 196, 248 } + }, { /* Coeff Band 6 */ + { 1, 103, 208, 236, 183, 185, 235, 190, 243, 202, 219 }, + { 95, 92, 185, 230, 175, 181, 233, 174, 242, 203, 225 }, + { 24, 72, 131, 213, 152, 171, 226, 164, 241, 202, 220 }, + { 3, 45, 74, 169, 123, 154, 204, 145, 238, 188, 222 } + }, { /* Coeff Band 7 */ + { 1, 63, 236, 247, 205, 194, 241, 189, 252, 222, 255 }, + { 151, 48, 224, 245, 200, 193, 240, 187, 255, 234, 255 }, + { 76, 45, 178, 240, 180, 189, 239, 182, 253, 231, 255 }, + { 38, 31, 111, 187, 125, 154, 217, 155, 253, 214, 255 } } } }; - static const vp9_coeff_probs default_coef_probs_8x8[BLOCK_TYPES_8X8] = { - { - /* block Type 0 */ - { - /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 1 */ - { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128}, - { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128}, - { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}, - { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128} - }, - { - /* Coeff Band 2 */ - { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128}, - { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128}, - { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}, - { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128} - }, - { - /* Coeff Band 3 */ - { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128}, - { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128}, - { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}, - { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128} - }, - { - /* Coeff Band 4 */ - { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128}, - { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128}, - { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}, - { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128} - }, - { - /* Coeff Band 5 */ - { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128}, - { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} - }, - { - /* Coeff Band 6 */ - { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128}, - { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} - }, - { - /* Coeff Band 7 */ - { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128}, - { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128}, - { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}, - { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128} - } - }, - { - /* block Type 1 */ - { - /* Coeff Band 0 */ - { 134, 152, 233, 224, 234, 52, 255, 166, 128, 128, 128}, - { 97, 132, 185, 234, 186, 189, 197, 171, 255, 212, 128}, - { 84, 110, 185, 237, 182, 182, 145, 145, 255, 255, 128} - }, - { - /* Coeff Band 1 */ - { 1, 124, 213, 247, 192, 212, 255, 255, 128, 128, 128}, - { 88, 111, 178, 254, 189, 211, 255, 255, 128, 128, 128}, - { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128}, - { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128} - }, - { - /* Coeff Band 2 */ - { 1, 102, 225, 255, 210, 240, 128, 128, 128, 128, 128}, - { 110, 78, 195, 254, 200, 191, 255, 255, 128, 128, 128}, - { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128}, - { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 3 */ - { 1, 1, 229, 255, 202, 224, 128, 128, 128, 128, 128}, - { 150, 1, 192, 255, 206, 226, 128, 128, 128, 128, 128}, - { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128}, - { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - } - }, - { - /* block Type 2 */ - { - /* Coeff Band 0 */ - { 11, 181, 226, 199, 183, 255, 255, 255, 128, 128, 128}, - { 2, 147, 185, 248, 163, 180, 255, 236, 128, 128, 128}, - { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128}, - { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128} - }, - { - /* Coeff Band 1 */ - { 1, 150, 191, 246, 174, 188, 255, 235, 128, 128, 128}, - { 1, 125, 166, 245, 165, 185, 255, 234, 128, 128, 128}, - { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128}, - { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128} - }, - { - /* Coeff Band 2 */ - { 1, 146, 184, 242, 167, 183, 255, 230, 255, 255, 128}, - { 1, 119, 160, 239, 156, 178, 255, 231, 255, 255, 128}, - { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128}, - { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128} - }, - { - /* Coeff Band 3 */ - { 1, 150, 188, 244, 169, 183, 255, 233, 255, 255, 128}, - { 1, 123, 162, 243, 161, 180, 255, 233, 128, 128, 128}, - { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128}, - { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128} - }, - { - /* Coeff Band 4 */ - { 1, 163, 202, 252, 188, 204, 255, 248, 128, 128, 128}, - { 1, 136, 180, 251, 181, 201, 255, 246, 128, 128, 128}, - { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128}, - { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128} - }, - { - /* Coeff Band 5 */ - { 1, 156, 195, 249, 179, 193, 255, 241, 255, 255, 128}, - { 1, 128, 169, 248, 171, 192, 255, 242, 255, 255, 128}, - { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128}, - { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128} - }, - { - /* Coeff Band 6 */ - { 1, 36, 71, 251, 192, 201, 255, 243, 255, 255, 128}, - { 1, 49, 185, 250, 184, 199, 255, 242, 128, 128, 128}, - { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128}, - { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128} - }, - { - /* Coeff Band 7 */ - { 1, 19, 98, 255, 218, 222, 255, 255, 128, 128, 128}, - { 36, 50, 210, 255, 212, 221, 255, 255, 128, 128, 128}, - { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}, - { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128} - } - }, - { /* block Type 3 */ + { /* block Type 0 */ { /* Coeff Band 0 */ - { 192, 18, 155, 172, 145, 164, 192, 135, 246, 223, 255}, - { 94, 29, 97, 131, 131, 153, 171, 121, 250, 190, 255}, - { 25, 29, 63, 128, 119, 147, 168, 124, 251, 183, 255}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { /* Coeff Band 1 */ - { 1, 108, 192, 220, 186, 173, 255, 194, 255, 255, 128}, - { 123, 104, 188, 221, 165, 171, 247, 180, 255, 255, 128}, - { 23, 76, 152, 216, 154, 166, 226, 182, 255, 209, 128}, - { 1, 26, 52, 162, 109, 152, 208, 144, 255, 231, 128} - }, - { /* Coeff Band 2 */ - { 1, 57, 179, 220, 156, 175, 210, 158, 255, 223, 128}, - { 48, 57, 134, 212, 151, 170, 219, 185, 255, 248, 128}, - { 4, 35, 63, 189, 120, 156, 221, 159, 255, 241, 128}, - { 1, 17, 23, 110, 97, 143, 187, 120, 255, 234, 128} - }, - { /* Coeff Band 3 */ - { 1, 115, 205, 243, 182, 187, 254, 218, 255, 255, 128}, - { 80, 101, 186, 241, 183, 186, 249, 182, 255, 255, 128}, - { 10, 81, 144, 229, 164, 175, 241, 185, 255, 255, 128}, - { 1, 44, 81, 192, 130, 148, 240, 180, 255, 255, 128} - }, - { /* Coeff Band 4 */ - { 1, 161, 207, 249, 187, 176, 255, 180, 128, 128, 128}, - { 79, 148, 196, 240, 186, 182, 253, 171, 255, 255, 128}, - { 14, 111, 171, 233, 170, 178, 235, 204, 255, 255, 128}, - { 1, 63, 103, 202, 143, 162, 240, 178, 255, 255, 128} - }, - { /* Coeff Band 5 */ - { 1, 101, 202, 239, 185, 184, 252, 186, 255, 255, 128}, - { 43, 67, 166, 237, 178, 190, 246, 194, 255, 255, 128}, - { 4, 49, 85, 220, 140, 168, 253, 182, 255, 255, 128}, - { 1, 24, 35, 144, 93, 135, 239, 159, 255, 253, 128} - }, - { /* Coeff Band 6 */ - { 1, 212, 243, 255, 240, 234, 255, 255, 128, 128, 128}, - { 98, 168, 234, 255, 229, 234, 255, 255, 128, 128, 128}, - { 19, 127, 199, 255, 212, 198, 255, 255, 128, 128, 128}, - { 1, 103, 162, 253, 186, 151, 255, 255, 128, 128, 128} - }, - { /* Coeff Band 7 */ - { 1, 188, 253, 255, 255, 128, 128, 128, 128, 128, 128}, - { 191, 68, 242, 255, 255, 128, 128, 128, 128, 128, 128}, - { 8, 132, 255, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 179, 203, 246, 252, 217, 208, 249, 197, 238, 237, 255 }, + { 136, 193, 232, 247, 202, 199, 245, 194, 255, 235, 255 }, + { 66, 170, 209, 244, 190, 191, 250, 199, 255, 242, 192 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 1, 191, 232, 250, 204, 201, 248, 199, 254, 243, 213 }, + { 50, 161, 209, 247, 196, 197, 250, 206, 253, 240, 213 }, + { 6, 118, 160, 239, 173, 186, 249, 203, 254, 235, 255 }, + { 2, 90, 110, 211, 141, 166, 242, 181, 254, 235, 255 } + }, { /* Coeff Band 3 */ + { 1, 209, 242, 254, 223, 215, 253, 218, 255, 253, 128 }, + { 58, 168, 227, 253, 216, 211, 254, 226, 255, 251, 128 }, + { 7, 111, 178, 249, 195, 202, 253, 222, 254, 240, 255 }, + { 2, 63, 103, 226, 142, 175, 250, 202, 255, 246, 128 } + }, { /* Coeff Band 4 */ + { 1, 207, 241, 252, 213, 205, 252, 215, 255, 228, 255 }, + { 55, 171, 225, 251, 209, 205, 251, 212, 254, 234, 255 }, + { 5, 108, 173, 247, 187, 195, 251, 211, 255, 231, 128 }, + { 2, 56, 97, 220, 138, 169, 248, 191, 253, 237, 255 } + }, { /* Coeff Band 5 */ + { 1, 211, 245, 255, 227, 219, 255, 233, 255, 255, 128 }, + { 58, 175, 228, 254, 217, 215, 255, 231, 255, 255, 128 }, + { 6, 124, 181, 249, 191, 199, 255, 222, 255, 251, 128 }, + { 2, 85, 122, 227, 149, 172, 250, 195, 255, 245, 128 } + }, { /* Coeff Band 6 */ + { 1, 216, 246, 255, 231, 217, 254, 220, 255, 250, 128 }, + { 74, 177, 236, 254, 222, 214, 254, 221, 255, 255, 128 }, + { 13, 125, 192, 250, 200, 203, 254, 217, 255, 245, 128 }, + { 2, 70, 114, 227, 147, 175, 251, 198, 255, 240, 128 } + }, { /* Coeff Band 7 */ + { 1, 199, 246, 255, 238, 229, 255, 226, 255, 255, 128 }, + { 132, 162, 240, 255, 229, 222, 255, 239, 255, 255, 128 }, + { 79, 125, 207, 253, 213, 214, 255, 232, 255, 255, 128 }, + { 41, 89, 149, 240, 161, 187, 250, 216, 255, 255, 128 } + } + }, { /* block Type 1 */ + { /* Coeff Band 0 */ + { 138, 65, 189, 212, 172, 169, 200, 153, 233, 182, 214 }, + { 93, 60, 162, 203, 160, 169, 200, 153, 239, 190, 213 }, + { 66, 55, 141, 195, 152, 166, 199, 152, 238, 190, 212 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 102, 221, 247, 205, 198, 248, 201, 255, 235, 128 }, + { 122, 95, 215, 247, 200, 197, 248, 200, 254, 227, 255 }, + { 60, 81, 166, 241, 177, 190, 245, 193, 255, 246, 255 }, + { 32, 61, 108, 195, 133, 159, 230, 163, 254, 230, 238 } + }, { /* Coeff Band 2 */ + { 1, 58, 203, 242, 194, 193, 229, 177, 253, 225, 249 }, + { 113, 62, 192, 237, 184, 187, 231, 181, 253, 220, 249 }, + { 50, 50, 135, 225, 159, 177, 229, 172, 254, 222, 241 }, + { 24, 34, 82, 185, 125, 152, 223, 158, 253, 212, 219 } + }, { /* Coeff Band 3 */ + { 1, 1, 220, 253, 218, 209, 251, 213, 255, 255, 128 }, + { 154, 1, 216, 252, 211, 206, 252, 212, 255, 252, 128 }, + { 102, 1, 157, 249, 184, 200, 253, 214, 255, 247, 128 }, + { 68, 1, 101, 213, 129, 161, 247, 186, 255, 237, 255 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, { /* block Type 2 */ + { /* Coeff Band 0 */ + { 229, 64, 235, 236, 189, 190, 227, 179, 247, 203, 226 }, + { 148, 70, 194, 228, 175, 182, 216, 170, 238, 192, 224 }, + { 53, 63, 134, 207, 150, 169, 213, 161, 247, 204, 232 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 173, 234, 244, 201, 193, 239, 180, 252, 214, 255 }, + { 160, 156, 222, 243, 200, 193, 237, 179, 253, 216, 255 }, + { 55, 119, 187, 240, 189, 192, 236, 180, 253, 226, 255 }, + { 14, 65, 105, 193, 142, 165, 205, 151, 249, 200, 250 } + }, { /* Coeff Band 2 */ + { 1, 124, 218, 246, 195, 196, 242, 198, 254, 229, 255 }, + { 85, 114, 180, 240, 179, 187, 239, 191, 253, 223, 239 }, + { 18, 81, 128, 220, 152, 173, 232, 176, 252, 221, 254 }, + { 2, 42, 64, 150, 115, 149, 192, 137, 247, 197, 247 } + }, { /* Coeff Band 3 */ + { 1, 164, 230, 251, 210, 204, 245, 201, 255, 238, 255 }, + { 96, 137, 210, 248, 199, 199, 244, 198, 254, 218, 255 }, + { 20, 97, 169, 240, 179, 188, 242, 190, 254, 228, 255 }, + { 2, 58, 95, 197, 137, 164, 220, 158, 252, 217, 248 } + }, { /* Coeff Band 4 */ + { 1, 193, 236, 245, 203, 194, 243, 191, 254, 223, 255 }, + { 86, 163, 217, 241, 190, 188, 242, 189, 253, 220, 255 }, + { 14, 108, 161, 228, 167, 178, 238, 180, 253, 224, 255 }, + { 1, 51, 84, 186, 127, 159, 216, 155, 251, 208, 243 } + }, { /* Coeff Band 5 */ + { 1, 183, 235, 248, 209, 197, 244, 195, 253, 236, 239 }, + { 79, 144, 208, 243, 193, 190, 244, 191, 254, 231, 255 }, + { 13, 100, 151, 227, 163, 176, 240, 180, 255, 233, 244 }, + { 1, 48, 77, 171, 121, 153, 214, 150, 252, 214, 245 } + }, { /* Coeff Band 6 */ + { 1, 202, 234, 252, 215, 207, 248, 207, 254, 242, 255 }, + { 75, 153, 216, 249, 203, 201, 248, 203, 255, 239, 255 }, + { 11, 104, 168, 241, 179, 189, 245, 194, 255, 237, 128 }, + { 1, 57, 95, 201, 134, 163, 229, 165, 254, 223, 246 } + }, { /* Coeff Band 7 */ + { 1, 184, 236, 254, 222, 212, 254, 225, 255, 255, 128 }, + { 74, 149, 220, 252, 210, 208, 253, 223, 255, 249, 128 }, + { 18, 109, 175, 247, 184, 195, 253, 211, 255, 250, 128 }, + { 3, 64, 113, 219, 144, 171, 246, 187, 255, 250, 128 } + } + }, { /* block Type 3 */ + { /* Coeff Band 0 */ + { 140, 101, 214, 227, 176, 182, 218, 167, 233, 205, 164 }, + { 96, 101, 176, 204, 161, 173, 193, 152, 223, 182, 182 }, + { 27, 84, 123, 176, 140, 162, 190, 142, 238, 189, 210 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 178, 218, 240, 189, 189, 238, 184, 250, 232, 189 }, + { 69, 146, 204, 239, 187, 189, 238, 183, 251, 226, 221 }, + { 16, 98, 157, 234, 170, 185, 237, 183, 252, 220, 218 }, + { 3, 49, 78, 172, 122, 154, 204, 150, 242, 198, 207 } + }, { /* Coeff Band 2 */ + { 1, 165, 207, 230, 179, 181, 234, 172, 252, 228, 218 }, + { 25, 130, 175, 224, 169, 177, 232, 169, 252, 230, 207 }, + { 4, 81, 118, 205, 144, 167, 227, 162, 252, 225, 219 }, + { 2, 51, 63, 150, 114, 148, 197, 138, 244, 202, 204 } + }, { /* Coeff Band 3 */ + { 1, 181, 222, 247, 200, 197, 246, 199, 252, 232, 228 }, + { 25, 142, 200, 244, 190, 193, 245, 195, 253, 233, 204 }, + { 3, 90, 146, 233, 166, 181, 242, 188, 252, 229, 216 }, + { 1, 47, 79, 188, 124, 157, 222, 162, 245, 213, 203 } + }, { /* Coeff Band 4 */ + { 1, 179, 220, 242, 195, 191, 237, 182, 251, 217, 231 }, + { 27, 144, 200, 241, 188, 190, 238, 185, 250, 224, 235 }, + { 3, 93, 149, 230, 166, 180, 235, 180, 249, 222, 221 }, + { 1, 47, 79, 181, 125, 157, 211, 154, 241, 205, 198 } + }, { /* Coeff Band 5 */ + { 1, 176, 222, 247, 202, 198, 247, 199, 252, 234, 219 }, + { 24, 139, 197, 244, 190, 192, 246, 196, 253, 232, 220 }, + { 2, 89, 140, 229, 161, 178, 243, 185, 253, 233, 234 }, + { 1, 49, 76, 176, 121, 154, 214, 153, 243, 209, 208 } + }, { /* Coeff Band 6 */ + { 1, 197, 233, 251, 213, 205, 247, 206, 249, 222, 247 }, + { 35, 159, 216, 249, 203, 201, 246, 203, 250, 222, 223 }, + { 4, 108, 167, 240, 178, 188, 244, 195, 248, 220, 235 }, + { 1, 58, 93, 198, 133, 161, 220, 167, 233, 195, 221 } + }, { /* Coeff Band 7 */ + { 1, 188, 240, 253, 221, 209, 248, 207, 252, 223, 255 }, + { 84, 153, 227, 251, 212, 205, 247, 205, 254, 215, 255 }, + { 25, 117, 182, 244, 186, 192, 243, 198, 250, 209, 255 }, + { 7, 72, 108, 197, 138, 162, 203, 161, 240, 178, 247 } } } }; - static const vp9_coeff_probs default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] = { - { - /* block Type 0 */ - { - /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 1 */ - { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128}, - { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128}, - { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}, - { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128} - }, - { - /* Coeff Band 2 */ - { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128}, - { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128}, - { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}, - { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128} - }, - { - /* Coeff Band 3 */ - { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128}, - { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128}, - { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}, - { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128} - }, - { - /* Coeff Band 4 */ - { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128}, - { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128}, - { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}, - { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128} - }, - { - /* Coeff Band 5 */ - { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128}, - { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} - }, - { - /* Coeff Band 6 */ - { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128}, - { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} - }, - { - /* Coeff Band 7 */ - { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128}, - { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128}, - { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}, - { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128} - } - }, - { - /* block Type 1 */ - { - /* Coeff Band 0 */ - { 134, 152, 233, 224, 234, 52, 255, 166, 128, 128, 128}, - { 97, 132, 185, 234, 186, 189, 197, 171, 255, 212, 128}, - { 84, 110, 185, 237, 182, 182, 145, 145, 255, 255, 128} - }, - { - /* Coeff Band 1 */ - { 1, 124, 213, 247, 192, 212, 255, 255, 128, 128, 128}, - { 88, 111, 178, 254, 189, 211, 255, 255, 128, 128, 128}, - { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128}, - { 12, 59, 129, 236, 150, 179, 239, 195, 255, 255, 128} - }, - { - /* Coeff Band 2 */ - { 1, 102, 225, 255, 210, 240, 128, 128, 128, 128, 128}, - { 110, 78, 195, 254, 200, 191, 255, 255, 128, 128, 128}, - { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128}, - { 37, 63, 177, 255, 194, 195, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 3 */ - { 1, 1, 229, 255, 202, 224, 128, 128, 128, 128, 128}, - { 150, 1, 192, 255, 206, 226, 128, 128, 128, 128, 128}, - { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128}, - { 75, 1, 138, 255, 172, 228, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 4 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 5 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 6 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { - /* Coeff Band 7 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - } - }, - { - /* block Type 2 */ - { - /* Coeff Band 0 */ - { 11, 181, 226, 199, 183, 255, 255, 255, 128, 128, 128}, - { 2, 147, 185, 248, 163, 180, 255, 236, 128, 128, 128}, - { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128}, - { 1, 123, 157, 238, 154, 176, 255, 226, 255, 255, 128} - }, - { - /* Coeff Band 1 */ - { 1, 150, 191, 246, 174, 188, 255, 235, 128, 128, 128}, - { 1, 125, 166, 245, 165, 185, 255, 234, 128, 128, 128}, - { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128}, - { 1, 79, 125, 240, 148, 179, 255, 234, 255, 255, 128} - }, - { - /* Coeff Band 2 */ - { 1, 146, 184, 242, 167, 183, 255, 230, 255, 255, 128}, - { 1, 119, 160, 239, 156, 178, 255, 231, 255, 255, 128}, - { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128}, - { 1, 75, 115, 234, 142, 173, 255, 225, 255, 255, 128} - }, - { - /* Coeff Band 3 */ - { 1, 150, 188, 244, 169, 183, 255, 233, 255, 255, 128}, - { 1, 123, 162, 243, 161, 180, 255, 233, 128, 128, 128}, - { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128}, - { 1, 76, 120, 238, 148, 178, 255, 230, 255, 255, 128} - }, - { - /* Coeff Band 4 */ - { 1, 163, 202, 252, 188, 204, 255, 248, 128, 128, 128}, - { 1, 136, 180, 251, 181, 201, 255, 246, 128, 128, 128}, - { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128}, - { 1, 92, 146, 249, 170, 197, 255, 245, 128, 128, 128} - }, - { - /* Coeff Band 5 */ - { 1, 156, 195, 249, 179, 193, 255, 241, 255, 255, 128}, - { 1, 128, 169, 248, 171, 192, 255, 242, 255, 255, 128}, - { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128}, - { 1, 84, 132, 245, 158, 187, 255, 240, 255, 255, 128} - }, - { - /* Coeff Band 6 */ - { 1, 36, 71, 251, 192, 201, 255, 243, 255, 255, 128}, - { 1, 49, 185, 250, 184, 199, 255, 242, 128, 128, 128}, - { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128}, - { 1, 95, 147, 247, 168, 190, 255, 239, 255, 255, 128} - }, - { - /* Coeff Band 7 */ - { 1, 19, 98, 255, 218, 222, 255, 255, 128, 128, 128}, - { 36, 50, 210, 255, 212, 221, 255, 255, 128, 128, 128}, - { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}, - { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128} - } - }, - { /* block Type 3 */ + { /* block Type 0 */ { /* Coeff Band 0 */ - { 192, 18, 155, 172, 145, 164, 192, 135, 246, 223, 255}, - { 94, 29, 97, 131, 131, 153, 171, 121, 250, 190, 255}, - { 25, 29, 63, 128, 119, 147, 168, 124, 251, 183, 255}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { /* Coeff Band 1 */ - { 1, 108, 192, 220, 186, 173, 255, 194, 255, 255, 128}, - { 123, 104, 188, 221, 165, 171, 247, 180, 255, 255, 128}, - { 23, 76, 152, 216, 154, 166, 226, 182, 255, 209, 128}, - { 1, 26, 52, 162, 109, 152, 208, 144, 255, 231, 128} - }, - { /* Coeff Band 2 */ - { 1, 57, 179, 220, 156, 175, 210, 158, 255, 223, 128}, - { 48, 57, 134, 212, 151, 170, 219, 185, 255, 248, 128}, - { 4, 35, 63, 189, 120, 156, 221, 159, 255, 241, 128}, - { 1, 17, 23, 110, 97, 143, 187, 120, 255, 234, 128} - }, - { /* Coeff Band 3 */ - { 1, 115, 205, 243, 182, 187, 254, 218, 255, 255, 128}, - { 80, 101, 186, 241, 183, 186, 249, 182, 255, 255, 128}, - { 10, 81, 144, 229, 164, 175, 241, 185, 255, 255, 128}, - { 1, 44, 81, 192, 130, 148, 240, 180, 255, 255, 128} - }, - { /* Coeff Band 4 */ - { 1, 161, 207, 249, 187, 176, 255, 180, 128, 128, 128}, - { 79, 148, 196, 240, 186, 182, 253, 171, 255, 255, 128}, - { 14, 111, 171, 233, 170, 178, 235, 204, 255, 255, 128}, - { 1, 63, 103, 202, 143, 162, 240, 178, 255, 255, 128} - }, - { /* Coeff Band 5 */ - { 1, 101, 202, 239, 185, 184, 252, 186, 255, 255, 128}, - { 43, 67, 166, 237, 178, 190, 246, 194, 255, 255, 128}, - { 4, 49, 85, 220, 140, 168, 253, 182, 255, 255, 128}, - { 1, 24, 35, 144, 93, 135, 239, 159, 255, 253, 128} - }, - { /* Coeff Band 6 */ - { 1, 212, 243, 255, 240, 234, 255, 255, 128, 128, 128}, - { 98, 168, 234, 255, 229, 234, 255, 255, 128, 128, 128}, - { 19, 127, 199, 255, 212, 198, 255, 255, 128, 128, 128}, - { 1, 103, 162, 253, 186, 151, 255, 255, 128, 128, 128} - }, - { /* Coeff Band 7 */ - { 1, 188, 253, 255, 255, 128, 128, 128, 128, 128, 128}, - { 191, 68, 242, 255, 255, 128, 128, 128, 128, 128, 128}, - { 8, 132, 255, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, { /* block Type 1 */ + { /* Coeff Band 0 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, { /* block Type 2 */ + { /* Coeff Band 0 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, { /* block Type 3 */ + { /* Coeff Band 0 */ + { 118, 27, 105, 170, 137, 166, 183, 137, 243, 189, 241 }, + { 44, 34, 85, 142, 127, 158, 161, 128, 232, 174, 213 }, + { 8, 26, 47, 104, 108, 145, 143, 117, 226, 168, 207 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 134, 172, 217, 163, 175, 226, 167, 251, 220, 204 }, + { 56, 129, 168, 217, 161, 174, 223, 164, 249, 218, 223 }, + { 20, 110, 151, 215, 158, 174, 221, 165, 249, 209, 221 }, + { 2, 59, 88, 169, 128, 157, 192, 143, 239, 189, 214 } + }, { /* Coeff Band 2 */ + { 1, 65, 126, 191, 140, 163, 218, 153, 252, 218, 229 }, + { 21, 57, 92, 175, 126, 156, 214, 148, 252, 218, 229 }, + { 4, 44, 66, 148, 114, 148, 200, 136, 251, 211, 228 }, + { 1, 28, 42, 108, 104, 141, 158, 119, 235, 180, 210 } + }, { /* Coeff Band 3 */ + { 1, 114, 172, 227, 166, 177, 236, 178, 252, 226, 233 }, + { 41, 94, 152, 218, 156, 172, 233, 172, 251, 223, 231 }, + { 9, 69, 116, 202, 142, 165, 226, 162, 251, 221, 227 }, + { 1, 36, 60, 151, 113, 148, 195, 140, 241, 198, 211 } + }, { /* Coeff Band 4 */ + { 1, 186, 200, 227, 174, 178, 230, 169, 248, 210, 238 }, + { 27, 148, 181, 221, 167, 176, 226, 166, 250, 218, 228 }, + { 3, 96, 139, 208, 154, 170, 219, 161, 249, 214, 229 }, + { 1, 44, 70, 156, 120, 152, 188, 139, 239, 193, 200 } + }, { /* Coeff Band 5 */ + { 1, 169, 203, 238, 186, 186, 238, 184, 252, 224, 230 }, + { 32, 119, 173, 232, 172, 181, 236, 182, 252, 222, 237 }, + { 6, 84, 128, 215, 150, 170, 232, 172, 251, 221, 235 }, + { 1, 49, 78, 167, 124, 154, 200, 145, 243, 198, 217 } + }, { /* Coeff Band 6 */ + { 1, 193, 215, 244, 197, 195, 239, 192, 249, 213, 240 }, + { 52, 136, 193, 239, 184, 189, 237, 189, 248, 211, 226 }, + { 13, 90, 146, 227, 162, 178, 233, 182, 248, 211, 231 }, + { 1, 49, 79, 177, 124, 156, 201, 154, 234, 188, 212 } + }, { /* Coeff Band 7 */ + { 1, 189, 238, 248, 219, 196, 232, 180, 253, 211, 255 }, + { 104, 148, 224, 245, 211, 194, 225, 171, 251, 206, 255 }, + { 43, 116, 190, 231, 179, 183, 217, 168, 249, 199, 255 }, + { 13, 65, 92, 154, 131, 152, 167, 132, 238, 174, 243 } } } }; - static const vp9_coeff_probs default_coef_probs_16x16[BLOCK_TYPES_16X16] = { { /* block Type 0 */ { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { /* Coeff Band 1 */ - { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128}, - { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128}, - { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}, - { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128} - }, - { /* Coeff Band 2 */ - { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128}, - { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128}, - { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}, - { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128} - }, - { /* Coeff Band 3 */ - { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128}, - { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128}, - { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}, - { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128} - }, - { /* Coeff Band 4 */ - { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128}, - { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128}, - { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}, - { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128} - }, - { /* Coeff Band 5 */ - { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128}, - { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} - }, - { /* Coeff Band 6 */ - { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128}, - { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} - }, - { /* Coeff Band 7 */ - { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128}, - { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128}, - { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}, - { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128} + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } - }, - { /* block Type 1 */ - { /* Coeff Band 0 */ - { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128}, - { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255}, - { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { /* Coeff Band 1 */ - { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128}, - { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128}, - { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128}, - { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128} - }, - { /* Coeff Band 2 */ - { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128}, - { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128}, - { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128}, - { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128} - }, - { /* Coeff Band 3 */ - { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128}, - { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128}, - { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128}, - { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128} - }, - { /* Coeff Band 4 */ - { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128}, - { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128}, - { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128}, - { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128} - }, - { /* Coeff Band 5 */ - { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128}, - { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128}, - { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128}, - { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128} - }, - { /* Coeff Band 6 */ - { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128}, - { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128}, - { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128}, - { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128} - }, - { /* Coeff Band 7 */ - { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128}, - { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128}, - { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128}, - { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128} - } - }, - { /* block Type 2 */ - { /* Coeff Band 0 */ - { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128}, - { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255}, - { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { /* Coeff Band 1 */ - { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128}, - { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128}, - { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128}, - { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128} - }, - { /* Coeff Band 2 */ - { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128}, - { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128}, - { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128}, - { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128} - }, - { /* Coeff Band 3 */ - { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128}, - { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128}, - { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128}, - { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128} - }, - { /* Coeff Band 4 */ - { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128}, - { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128}, - { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128}, - { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128} - }, - { /* Coeff Band 5 */ - { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128}, - { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128}, - { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128}, - { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128} - }, - { /* Coeff Band 6 */ - { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128}, - { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128}, - { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128}, - { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128} - }, - { /* Coeff Band 7 */ - { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128}, - { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128}, - { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128}, - { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128} - } - }, - { /* block Type 3 */ + }, { /* block Type 1 */ { /* Coeff Band 0 */ - { 17, 105, 227, 195, 164, 170, 168, 137, 221, 160, 184}, - { 6, 92, 166, 193, 158, 169, 179, 142, 236, 175, 200}, - { 2, 68, 118, 193, 147, 168, 187, 149, 241, 178, 247}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { /* Coeff Band 1 */ - { 1, 193, 221, 246, 198, 194, 244, 176, 255, 192, 128}, - { 112, 160, 209, 244, 196, 194, 243, 175, 255, 209, 128}, - { 45, 123, 175, 240, 184, 195, 239, 178, 255, 218, 255}, - { 16, 53, 75, 169, 119, 152, 209, 146, 255, 219, 255} - }, - { /* Coeff Band 2 */ - { 1, 141, 183, 240, 176, 187, 246, 198, 255, 218, 128}, - { 36, 97, 150, 231, 161, 180, 243, 191, 255, 217, 255}, - { 8, 65, 111, 210, 143, 166, 230, 167, 255, 224, 255}, - { 2, 35, 61, 157, 113, 149, 208, 142, 255, 217, 255} - }, - { /* Coeff Band 3 */ - { 1, 173, 196, 245, 184, 191, 252, 211, 255, 240, 128}, - { 35, 119, 175, 242, 177, 187, 252, 209, 255, 235, 128}, - { 4, 88, 141, 234, 161, 180, 249, 200, 255, 228, 128}, - { 1, 57, 95, 203, 133, 161, 235, 167, 255, 231, 255} - }, - { /* Coeff Band 4 */ - { 1, 208, 227, 249, 209, 204, 248, 188, 255, 248, 128}, - { 28, 162, 211, 247, 203, 200, 252, 188, 255, 232, 128}, - { 5, 114, 174, 238, 182, 189, 245, 184, 255, 238, 128}, - { 1, 61, 100, 205, 136, 164, 235, 163, 255, 239, 128} - }, - { /* Coeff Band 5 */ - { 1, 195, 218, 252, 208, 207, 250, 205, 255, 245, 128}, - { 22, 141, 196, 249, 198, 201, 250, 202, 255, 244, 128}, - { 2, 105, 163, 240, 178, 189, 246, 191, 255, 246, 128}, - { 1, 70, 112, 206, 144, 167, 232, 162, 255, 239, 128} - }, - { /* Coeff Band 6 */ - { 1, 204, 215, 251, 204, 203, 255, 222, 255, 225, 128}, - { 15, 140, 194, 249, 194, 199, 254, 221, 255, 253, 128}, - { 1, 95, 153, 243, 172, 188, 254, 213, 255, 248, 128}, - { 1, 59, 99, 216, 135, 166, 247, 190, 255, 237, 255} - }, - { /* Coeff Band 7 */ - { 1, 7, 231, 255, 227, 223, 255, 240, 255, 255, 128}, - { 15, 157, 217, 255, 218, 219, 255, 239, 255, 255, 128}, - { 1, 114, 182, 252, 198, 207, 255, 235, 255, 255, 128}, - { 1, 71, 122, 238, 154, 181, 255, 216, 255, 255, 128} + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, { /* block Type 2 */ + { /* Coeff Band 0 */ + { 223, 34, 236, 234, 193, 185, 216, 169, 239, 189, 229 }, + { 125, 40, 195, 221, 173, 175, 209, 165, 220, 181, 196 }, + { 41, 37, 127, 185, 145, 162, 191, 150, 227, 180, 219 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 160, 224, 239, 193, 190, 213, 178, 244, 174, 255 }, + { 199, 154, 212, 238, 190, 190, 210, 173, 246, 183, 249 }, + { 88, 122, 178, 234, 180, 187, 213, 174, 244, 182, 247 }, + { 27, 69, 100, 174, 139, 165, 159, 142, 225, 157, 240 } + }, { /* Coeff Band 2 */ + { 1, 118, 207, 237, 179, 185, 234, 189, 241, 194, 237 }, + { 86, 103, 161, 227, 163, 176, 231, 183, 241, 196, 234 }, + { 19, 69, 113, 205, 140, 166, 220, 169, 240, 188, 242 }, + { 3, 32, 49, 106, 111, 144, 132, 121, 225, 151, 237 } + }, { /* Coeff Band 3 */ + { 1, 160, 218, 245, 197, 195, 235, 189, 254, 218, 255 }, + { 90, 127, 193, 240, 186, 189, 235, 187, 251, 217, 230 }, + { 18, 92, 148, 229, 164, 179, 228, 180, 254, 212, 229 }, + { 2, 50, 79, 163, 126, 156, 186, 140, 247, 191, 236 } + }, { /* Coeff Band 4 */ + { 1, 196, 231, 240, 203, 191, 225, 171, 253, 214, 255 }, + { 71, 167, 210, 234, 194, 188, 218, 165, 253, 215, 236 }, + { 11, 119, 165, 217, 171, 177, 213, 155, 252, 209, 255 }, + { 1, 46, 70, 145, 121, 153, 180, 131, 249, 192, 246 } + }, { /* Coeff Band 5 */ + { 1, 176, 223, 242, 202, 194, 222, 169, 253, 211, 244 }, + { 62, 131, 191, 233, 185, 186, 219, 164, 251, 211, 252 }, + { 7, 89, 133, 207, 156, 173, 211, 157, 251, 206, 247 }, + { 1, 36, 56, 127, 113, 147, 166, 125, 243, 183, 242 } + }, { /* Coeff Band 6 */ + { 1, 203, 232, 249, 213, 202, 245, 193, 254, 237, 255 }, + { 51, 155, 212, 245, 199, 195, 244, 192, 254, 234, 255 }, + { 7, 101, 158, 233, 170, 181, 244, 185, 253, 242, 255 }, + { 1, 49, 82, 185, 123, 157, 226, 156, 252, 225, 240 } + }, { /* Coeff Band 7 */ + { 1, 222, 233, 252, 220, 207, 247, 206, 255, 240, 128 }, + { 40, 159, 216, 250, 205, 201, 248, 207, 249, 219, 255 }, + { 6, 106, 163, 240, 176, 188, 247, 198, 251, 222, 255 }, + { 1, 51, 88, 196, 127, 159, 232, 169, 252, 214, 255 } + } + }, { /* block Type 3 */ + { /* Coeff Band 0 */ + { 14, 78, 225, 217, 173, 181, 198, 153, 228, 185, 176 }, + { 9, 74, 179, 191, 157, 171, 178, 143, 229, 175, 209 }, + { 3, 48, 92, 128, 130, 155, 135, 123, 220, 155, 219 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 178, 209, 214, 173, 175, 208, 152, 252, 210, 237 }, + { 142, 151, 193, 212, 170, 175, 209, 151, 251, 208, 237 }, + { 38, 105, 150, 206, 159, 173, 208, 151, 250, 209, 238 }, + { 5, 44, 61, 128, 114, 147, 167, 125, 239, 184, 217 } + }, { /* Coeff Band 2 */ + { 1, 154, 195, 202, 166, 173, 184, 144, 245, 184, 236 }, + { 49, 110, 150, 188, 155, 168, 180, 141, 244, 183, 239 }, + { 4, 63, 90, 158, 132, 157, 171, 134, 243, 179, 239 }, + { 1, 25, 37, 93, 104, 141, 133, 114, 231, 161, 226 } + }, { /* Coeff Band 3 */ + { 1, 184, 201, 223, 173, 177, 224, 164, 253, 220, 238 }, + { 42, 127, 170, 215, 164, 173, 223, 162, 253, 219, 233 }, + { 4, 75, 114, 195, 142, 164, 218, 155, 253, 217, 235 }, + { 1, 32, 50, 128, 108, 144, 180, 127, 247, 197, 219 } + }, { /* Coeff Band 4 */ + { 1, 190, 207, 232, 181, 184, 228, 172, 251, 216, 212 }, + { 35, 136, 180, 227, 173, 180, 227, 171, 251, 216, 218 }, + { 2, 85, 131, 214, 154, 173, 224, 166, 250, 214, 225 }, + { 1, 44, 71, 162, 120, 153, 195, 143, 240, 195, 197 } + }, { /* Coeff Band 5 */ + { 1, 185, 201, 230, 177, 180, 232, 172, 253, 225, 235 }, + { 27, 122, 165, 221, 164, 175, 230, 169, 253, 224, 220 }, + { 1, 72, 108, 197, 139, 163, 224, 159, 253, 224, 226 }, + { 1, 33, 51, 132, 107, 144, 186, 130, 245, 201, 206 } + }, { /* Coeff Band 6 */ + { 1, 203, 214, 240, 193, 191, 235, 178, 252, 225, 224 }, + { 20, 140, 188, 235, 182, 186, 234, 177, 252, 226, 226 }, + { 1, 85, 132, 218, 155, 174, 230, 170, 251, 224, 227 }, + { 1, 39, 62, 154, 114, 150, 199, 141, 241, 203, 214 } + }, { /* Coeff Band 7 */ + { 1, 217, 224, 244, 202, 193, 241, 187, 252, 227, 239 }, + { 22, 151, 200, 239, 187, 188, 240, 184, 252, 226, 237 }, + { 2, 90, 138, 222, 158, 174, 237, 176, 252, 226, 239 }, + { 1, 41, 66, 163, 116, 151, 206, 146, 243, 201, 230 } } } }; - -static const vp9_coeff_probs - default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] = { +static const vp9_coeff_probs default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] = { { /* block Type 0 */ { /* Coeff Band 0 */ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { /* Coeff Band 1 */ - { 60, 140, 195, 255, 212, 214, 128, 128, 128, 128, 128}, - { 75, 221, 231, 255, 203, 255, 128, 128, 128, 128, 128}, - { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128}, - { 9, 212, 196, 251, 197, 207, 255, 185, 128, 128, 128} - }, - { /* Coeff Band 2 */ - { 1, 227, 226, 255, 215, 215, 128, 128, 128, 128, 128}, - { 5, 163, 209, 255, 212, 212, 255, 255, 128, 128, 128}, - { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128}, - { 1, 133, 203, 255, 210, 220, 255, 255, 128, 128, 128} - }, - { /* Coeff Band 3 */ - { 1, 226, 225, 255, 228, 236, 128, 128, 128, 128, 128}, - { 6, 163, 208, 255, 224, 234, 255, 255, 128, 128, 128}, - { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128}, - { 1, 122, 196, 253, 212, 248, 255, 255, 128, 128, 128} - }, - { /* Coeff Band 4 */ - { 1, 222, 197, 254, 193, 216, 255, 236, 128, 128, 128}, - { 7, 140, 163, 251, 195, 211, 255, 238, 128, 128, 128}, - { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128}, - { 1, 91, 152, 249, 181, 197, 255, 239, 128, 128, 128} - }, - { /* Coeff Band 5 */ - { 1, 226, 218, 255, 216, 241, 255, 255, 128, 128, 128}, - { 6, 154, 191, 255, 218, 240, 255, 255, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} - }, - { /* Coeff Band 6 */ - { 1, 221, 217, 255, 208, 217, 255, 232, 128, 128, 128}, - { 11, 155, 189, 254, 203, 211, 255, 249, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128}, - { 1, 110, 171, 252, 191, 204, 255, 236, 128, 128, 128} - }, - { /* Coeff Band 7 */ - { 1, 207, 235, 255, 232, 240, 128, 128, 128, 128, 128}, - { 58, 161, 216, 255, 229, 235, 255, 255, 128, 128, 128}, - { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128}, - { 8, 133, 204, 255, 219, 231, 255, 255, 128, 128, 128} + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } - }, - { /* block Type 1 */ - { /* Coeff Band 0 */ - { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128}, - { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255}, - { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { /* Coeff Band 1 */ - { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128}, - { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128}, - { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128}, - { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128} - }, - { /* Coeff Band 2 */ - { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128}, - { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128}, - { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128}, - { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128} - }, - { /* Coeff Band 3 */ - { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128}, - { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128}, - { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128}, - { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128} - }, - { /* Coeff Band 4 */ - { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128}, - { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128}, - { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128}, - { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128} - }, - { /* Coeff Band 5 */ - { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128}, - { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128}, - { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128}, - { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128} - }, - { /* Coeff Band 6 */ - { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128}, - { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128}, - { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128}, - { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128} - }, - { /* Coeff Band 7 */ - { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128}, - { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128}, - { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128}, - { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128} - } - }, - { /* block Type 2 */ - { /* Coeff Band 0 */ - { 1, 30, 103, 204, 142, 168, 235, 161, 255, 228, 128}, - { 1, 35, 90, 192, 130, 161, 227, 158, 255, 226, 255}, - { 1, 36, 78, 180, 122, 156, 221, 153, 255, 222, 255}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { /* Coeff Band 1 */ - { 1, 163, 228, 253, 212, 194, 255, 205, 128, 128, 128}, - { 67, 160, 226, 253, 210, 202, 245, 172, 255, 255, 128}, - { 51, 147, 219, 251, 207, 207, 255, 217, 128, 128, 128}, - { 25, 107, 175, 245, 183, 190, 254, 209, 255, 255, 128} - }, - { /* Coeff Band 2 */ - { 1, 66, 170, 240, 177, 186, 252, 203, 255, 245, 128}, - { 23, 64, 145, 230, 161, 177, 252, 198, 255, 255, 128}, - { 6, 51, 99, 208, 135, 163, 249, 178, 255, 248, 128}, - { 1, 33, 59, 161, 104, 151, 238, 164, 255, 237, 128} - }, - { /* Coeff Band 3 */ - { 1, 76, 216, 250, 198, 199, 255, 226, 255, 255, 128}, - { 86, 83, 200, 247, 189, 193, 255, 224, 255, 255, 128}, - { 30, 75, 164, 242, 172, 184, 254, 218, 255, 255, 128}, - { 3, 54, 103, 227, 140, 172, 253, 201, 255, 255, 128} - }, - { /* Coeff Band 4 */ - { 1, 241, 247, 255, 233, 223, 255, 255, 128, 128, 128}, - { 78, 212, 242, 255, 226, 230, 255, 255, 128, 128, 128}, - { 10, 167, 224, 255, 217, 225, 255, 128, 128, 128, 128}, - { 1, 104, 176, 250, 166, 219, 255, 255, 128, 128, 128} - }, - { /* Coeff Band 5 */ - { 1, 194, 241, 254, 228, 214, 248, 237, 255, 255, 128}, - { 95, 133, 228, 254, 218, 215, 255, 229, 128, 128, 128}, - { 24, 119, 201, 252, 202, 205, 255, 229, 128, 128, 128}, - { 1, 88, 155, 246, 183, 193, 255, 205, 128, 128, 128} - }, - { /* Coeff Band 6 */ - { 1, 204, 236, 255, 222, 220, 255, 239, 128, 128, 128}, - { 126, 105, 225, 254, 214, 217, 255, 254, 128, 128, 128}, - { 44, 86, 190, 251, 197, 204, 255, 233, 128, 128, 128}, - { 6, 71, 130, 240, 164, 188, 255, 246, 128, 128, 128} - }, - { /* Coeff Band 7 */ - { 1, 195, 250, 255, 239, 197, 128, 128, 128, 128, 128}, - { 167, 102, 249, 255, 234, 255, 128, 128, 128, 128, 128}, - { 65, 91, 222, 255, 217, 255, 128, 128, 128, 128, 128}, - { 1, 59, 128, 255, 154, 255, 128, 128, 128, 128, 128} - } - }, - { /* block Type 3 */ + }, { /* block Type 1 */ { /* Coeff Band 0 */ - { 17, 105, 227, 195, 164, 170, 168, 137, 221, 160, 184}, - { 6, 92, 166, 193, 158, 169, 179, 142, 236, 175, 200}, - { 2, 68, 118, 193, 147, 168, 187, 149, 241, 178, 247}, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} - }, - { /* Coeff Band 1 */ - { 1, 193, 221, 246, 198, 194, 244, 176, 255, 192, 128}, - { 112, 160, 209, 244, 196, 194, 243, 175, 255, 209, 128}, - { 45, 123, 175, 240, 184, 195, 239, 178, 255, 218, 255}, - { 16, 53, 75, 169, 119, 152, 209, 146, 255, 219, 255} - }, - { /* Coeff Band 2 */ - { 1, 141, 183, 240, 176, 187, 246, 198, 255, 218, 128}, - { 36, 97, 150, 231, 161, 180, 243, 191, 255, 217, 255}, - { 8, 65, 111, 210, 143, 166, 230, 167, 255, 224, 255}, - { 2, 35, 61, 157, 113, 149, 208, 142, 255, 217, 255} - }, - { /* Coeff Band 3 */ - { 1, 173, 196, 245, 184, 191, 252, 211, 255, 240, 128}, - { 35, 119, 175, 242, 177, 187, 252, 209, 255, 235, 128}, - { 4, 88, 141, 234, 161, 180, 249, 200, 255, 228, 128}, - { 1, 57, 95, 203, 133, 161, 235, 167, 255, 231, 255} - }, - { /* Coeff Band 4 */ - { 1, 208, 227, 249, 209, 204, 248, 188, 255, 248, 128}, - { 28, 162, 211, 247, 203, 200, 252, 188, 255, 232, 128}, - { 5, 114, 174, 238, 182, 189, 245, 184, 255, 238, 128}, - { 1, 61, 100, 205, 136, 164, 235, 163, 255, 239, 128} - }, - { /* Coeff Band 5 */ - { 1, 195, 218, 252, 208, 207, 250, 205, 255, 245, 128}, - { 22, 141, 196, 249, 198, 201, 250, 202, 255, 244, 128}, - { 2, 105, 163, 240, 178, 189, 246, 191, 255, 246, 128}, - { 1, 70, 112, 206, 144, 167, 232, 162, 255, 239, 128} - }, - { /* Coeff Band 6 */ - { 1, 204, 215, 251, 204, 203, 255, 222, 255, 225, 128}, - { 15, 140, 194, 249, 194, 199, 254, 221, 255, 253, 128}, - { 1, 95, 153, 243, 172, 188, 254, 213, 255, 248, 128}, - { 1, 59, 99, 216, 135, 166, 247, 190, 255, 237, 255} - }, - { /* Coeff Band 7 */ - { 1, 7, 231, 255, 227, 223, 255, 240, 255, 255, 128}, - { 15, 157, 217, 255, 218, 219, 255, 239, 255, 255, 128}, - { 1, 114, 182, 252, 198, 207, 255, 235, 255, 255, 128}, - { 1, 71, 122, 238, 154, 181, 255, 216, 255, 255, 128} + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, { /* block Type 2 */ + { /* Coeff Band 0 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, { /* block Type 3 */ + { /* Coeff Band 0 */ + { 3, 29, 86, 140, 130, 163, 135, 131, 190, 148, 186 }, + { 1, 26, 61, 105, 124, 156, 105, 119, 178, 138, 173 }, + { 1, 15, 28, 60, 105, 142, 80, 105, 173, 128, 178 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 130, 142, 172, 141, 161, 191, 140, 244, 193, 216 }, + { 61, 124, 141, 173, 141, 161, 190, 139, 244, 194, 215 }, + { 28, 103, 124, 171, 138, 160, 190, 140, 243, 194, 225 }, + { 1, 36, 51, 111, 109, 144, 152, 120, 227, 173, 205 } + }, { /* Coeff Band 2 */ + { 1, 60, 125, 153, 143, 159, 156, 127, 234, 170, 233 }, + { 22, 48, 78, 129, 124, 152, 151, 123, 234, 170, 233 }, + { 3, 32, 46, 98, 107, 142, 138, 114, 232, 165, 232 }, + { 1, 15, 23, 61, 96, 135, 101, 103, 210, 144, 213 } + }, { /* Coeff Band 3 */ + { 1, 102, 144, 182, 146, 162, 194, 143, 246, 196, 239 }, + { 34, 76, 116, 171, 136, 159, 192, 140, 246, 195, 239 }, + { 4, 51, 81, 153, 124, 153, 184, 135, 246, 192, 239 }, + { 1, 23, 37, 98, 102, 140, 142, 116, 230, 167, 227 } + }, { /* Coeff Band 4 */ + { 1, 165, 171, 214, 163, 174, 214, 160, 245, 203, 219 }, + { 16, 120, 154, 210, 158, 172, 212, 159, 245, 201, 219 }, + { 1, 80, 122, 199, 147, 167, 208, 154, 244, 200, 223 }, + { 1, 40, 65, 145, 118, 151, 171, 135, 226, 175, 202 } + }, { /* Coeff Band 5 */ + { 1, 146, 162, 215, 159, 172, 226, 165, 251, 218, 231 }, + { 16, 92, 131, 205, 147, 167, 224, 162, 252, 217, 228 }, + { 2, 60, 92, 182, 129, 158, 216, 152, 251, 214, 234 }, + { 1, 32, 50, 126, 107, 144, 176, 128, 240, 189, 216 } + }, { /* Coeff Band 6 */ + { 1, 178, 186, 224, 172, 178, 224, 167, 251, 214, 232 }, + { 14, 118, 158, 215, 160, 173, 223, 164, 250, 214, 228 }, + { 2, 70, 109, 194, 139, 164, 217, 156, 250, 213, 227 }, + { 1, 32, 51, 129, 108, 146, 175, 128, 240, 187, 218 } + }, { /* Coeff Band 7 */ + { 1, 210, 214, 240, 192, 188, 235, 182, 251, 221, 228 }, + { 22, 140, 187, 233, 177, 183, 234, 178, 251, 219, 233 }, + { 3, 82, 130, 215, 152, 171, 229, 171, 250, 217, 232 }, + { 1, 38, 63, 154, 115, 149, 195, 141, 240, 196, 219 } } } }; - -#define default_coef_probs_32x32 default_coef_probs_16x16 +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = { + { /* block Type 0 */ + { /* Coeff Band 0 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, { /* block Type 1 */ + { /* Coeff Band 0 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, { /* block Type 2 */ + { /* Coeff Band 0 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 2 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 3 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 4 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 5 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 6 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 7 */ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, { /* block Type 3 */ + { /* Coeff Band 0 */ + { 8, 40, 224, 217, 183, 181, 180, 148, 200, 180, 123 }, + { 6, 37, 178, 193, 173, 171, 160, 139, 205, 166, 173 }, + { 3, 27, 93, 133, 143, 159, 115, 125, 183, 141, 178 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, { /* Coeff Band 1 */ + { 1, 170, 209, 202, 172, 175, 179, 143, 238, 181, 214 }, + { 184, 164, 199, 199, 169, 173, 180, 143, 238, 184, 217 }, + { 99, 128, 165, 194, 161, 171, 180, 142, 239, 182, 219 }, + { 17, 49, 59, 102, 117, 148, 122, 116, 208, 152, 191 } + }, { /* Coeff Band 2 */ + { 1, 136, 200, 197, 172, 172, 168, 142, 226, 170, 216 }, + { 66, 104, 146, 175, 152, 165, 163, 139, 225, 170, 219 }, + { 11, 52, 83, 144, 130, 156, 151, 130, 222, 165, 216 }, + { 1, 16, 25, 65, 99, 137, 96, 106, 190, 138, 184 } + }, { /* Coeff Band 3 */ + { 1, 180, 203, 198, 166, 170, 190, 143, 241, 190, 227 }, + { 74, 125, 161, 187, 154, 165, 187, 142, 241, 189, 224 }, + { 15, 70, 98, 163, 133, 157, 182, 137, 241, 187, 226 }, + { 1, 25, 37, 89, 104, 140, 128, 113, 218, 158, 206 } + }, { /* Coeff Band 4 */ + { 1, 191, 208, 213, 169, 173, 212, 156, 246, 206, 217 }, + { 53, 136, 170, 205, 159, 170, 211, 156, 246, 205, 208 }, + { 3, 75, 112, 189, 140, 163, 209, 151, 246, 205, 215 }, + { 1, 32, 51, 127, 108, 145, 171, 128, 231, 183, 197 } + }, { /* Coeff Band 5 */ + { 1, 183, 195, 202, 161, 168, 206, 150, 247, 202, 229 }, + { 42, 113, 144, 190, 147, 163, 203, 148, 247, 202, 229 }, + { 2, 56, 82, 160, 124, 153, 195, 140, 246, 200, 229 }, + { 1, 22, 34, 93, 99, 138, 143, 115, 227, 170, 206 } + }, { /* Coeff Band 6 */ + { 1, 202, 193, 221, 168, 175, 227, 167, 251, 217, 236 }, + { 26, 122, 158, 213, 157, 171, 225, 165, 251, 216, 242 }, + { 1, 68, 105, 194, 136, 162, 221, 158, 251, 215, 239 }, + { 1, 32, 51, 131, 107, 145, 179, 130, 240, 188, 231 } + }, { /* Coeff Band 7 */ + { 1, 234, 212, 243, 195, 192, 240, 187, 253, 226, 227 }, + { 14, 141, 186, 237, 181, 186, 239, 184, 253, 226, 233 }, + { 1, 85, 132, 221, 155, 174, 235, 176, 253, 224, 226 }, + { 1, 39, 65, 159, 115, 150, 202, 144, 245, 202, 214 } + } + } +}; +#endif diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index bcc3b1f81..d5669c532 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -577,7 +577,7 @@ static void print_counter(FILE *f, vp9_coeff_accum *context_counters, fprintf(f, "static const vp9_coeff_count %s = {\n", header); -# define Comma( X) (X? ",":"") +#define Comma(X) (X ? "," : "") type = 0; do { fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); @@ -590,7 +590,7 @@ static void print_counter(FILE *f, vp9_coeff_accum *context_counters, t = 0; do { - const INT64 x = context_counters [type] [band] [pt] [t]; + const INT64 x = context_counters[type][band][pt][t]; const int y = (int) x; assert(x == (INT64) y); /* no overflow handling yet */ @@ -609,17 +609,20 @@ static void print_probs(FILE *f, vp9_coeff_accum *context_counters, int block_types, const char *header) { int type, band, pt, t; - fprintf(f, "static const vp9_coeff_probs %s = {\n", header); + fprintf(f, "static const vp9_coeff_probs %s = {", header); type = 0; +#define Newline(x, spaces) (x ? " " : "\n" spaces) do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); + fprintf(f, "%s%s{ /* block Type %d */", + Comma(type), Newline(type, " "), type); band = 0; do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); + fprintf(f, "%s%s{ /* Coeff Band %d */", + Comma(band), Newline(band, " "), band); pt = 0; do { - unsigned int branch_ct [ENTROPY_NODES] [2]; + unsigned int branch_ct[ENTROPY_NODES][2]; unsigned int coef_counts[MAX_ENTROPY_TOKENS]; vp9_prob coef_probs[ENTROPY_NODES]; @@ -632,10 +635,10 @@ static void print_probs(FILE *f, vp9_coeff_accum *context_counters, t = 0; do { - fprintf(f, "%s %d", Comma(t), coef_probs[t]); + fprintf(f, "%s %3d", Comma(t), coef_probs[t]); } while (++t < ENTROPY_NODES); - fprintf(f, "}"); + fprintf(f, " }"); } while (++pt < PREV_COEF_CONTEXTS); fprintf(f, "\n }"); } while (++band < COEF_BANDS); @@ -670,20 +673,20 @@ void print_context_counters() { /* print coefficient probabilities */ print_probs(f, context_counters_4x4, BLOCK_TYPES_4X4, - "vp9_default_coef_probs_4x4[BLOCK_TYPES_4X4]"); + "default_coef_probs_4x4[BLOCK_TYPES_4X4]"); print_probs(f, hybrid_context_counters_4x4, BLOCK_TYPES_4X4, - "vp9_default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]"); + "default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]"); print_probs(f, context_counters_8x8, BLOCK_TYPES_8X8, - "vp9_default_coef_probs_8x8[BLOCK_TYPES_8X8]"); + "default_coef_probs_8x8[BLOCK_TYPES_8X8]"); print_probs(f, hybrid_context_counters_8x8, BLOCK_TYPES_8X8, - "vp9_default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]"); + "default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]"); print_probs(f, context_counters_16x16, BLOCK_TYPES_16X16, - "vp9_default_coef_probs_16x16[BLOCK_TYPES_16X16]"); + "default_coef_probs_16x16[BLOCK_TYPES_16X16]"); print_probs(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16, - "vp9_default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]"); + "default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]"); #if CONFIG_TX32X32 print_probs(f, context_counters_32x32, BLOCK_TYPES_32X32, - "vp9_default_coef_probs_32x32[BLOCK_TYPES_32X32]"); + "default_coef_probs_32x32[BLOCK_TYPES_32X32]"); #endif fclose(f); From 210dc5b2db5de75b6cdd97e0bf79e7fb2bc11cba Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Tue, 11 Dec 2012 17:06:35 -0800 Subject: [PATCH 19/77] Further improvements on the hybrid dwt/dct expt Modifies the scanning pattern and uses a floating point 16x16 dct implementation for now to handle scaling better. Also experiments are in progress with 2/6 and 9/7 wavelets. Results have improved to within ~0.25% of 32x32 dct for std-hd and about 0.03% for derf. This difference can probably be bridged by re-optimizing the entropy stats for these transforms. Currently the stats used are common between 32x32 dct and dwt/dct. Experiments are in progress with various scan pattern - wavelet combinations. Ideally the subbands should be tokenized separately, and an experiment will be condcuted next on that. Change-Id: Ia9cbfc2d63cb7a47e562b2cd9341caf962bcc110 --- vp9/common/vp9_entropy.c | 296 ++++++++++++-- vp9/common/vp9_idctllm.c | 861 +++++++++++++++++++++++++++++---------- vp9/encoder/vp9_dct.c | 439 ++++++++++++++++++-- 3 files changed, 1323 insertions(+), 273 deletions(-) diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 9d8e924d5..b5da685ac 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -96,42 +96,271 @@ DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]) = { // Table can be optimized. DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]) = { - 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, - 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, }; + DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { - 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4, 5, - 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22, 37, 52, - 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8, 9, 24, 39, - 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100, 85, 70, 55, 40, - 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131, 146, 161, 176, 192, 177, - 162, 147, 132, 117, 102, 87, 72, 57, 42, 27, 12, 13, 28, 43, 58, 73, - 88, 103, 118, 133, 148, 163, 178, 193, 208, 224, 209, 194, 179, 164, 149, 134, - 119, 104, 89, 74, 59, 44, 29, 14, 15, 30, 45, 60, 75, 90, 105, 120, - 135, 150, 165, 180, 195, 210, 225, 240, 241, 226, 211, 196, 181, 166, 151, 136, - 121, 106, 91, 76, 61, 46, 31, 47, 62, 77, 92, 107, 122, 137, 152, 167, - 182, 197, 212, 227, 242, 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, - 78, 63, 79, 94, 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, - 215, 200, 185, 170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, - 216, 231, 246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, - 203, 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235, - 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, 255, + 0, 1, 16, 32, 17, 2, 3, 18, + 33, 48, 64, 49, 34, 19, 4, 5, + 20, 35, 50, 65, 80, 96, 81, 66, + 51, 36, 21, 6, 7, 22, 37, 52, + 67, 82, 97, 112, 128, 113, 98, 83, + 68, 53, 38, 23, 8, 9, 24, 39, + 54, 69, 84, 99, 114, 129, 144, 160, + 145, 130, 115, 100, 85, 70, 55, 40, + 25, 10, 11, 26, 41, 56, 71, 86, + 101, 116, 131, 146, 161, 176, 192, 177, + 162, 147, 132, 117, 102, 87, 72, 57, + 42, 27, 12, 13, 28, 43, 58, 73, + 88, 103, 118, 133, 148, 163, 178, 193, + 208, 224, 209, 194, 179, 164, 149, 134, + 119, 104, 89, 74, 59, 44, 29, 14, + 15, 30, 45, 60, 75, 90, 105, 120, + 135, 150, 165, 180, 195, 210, 225, 240, + 241, 226, 211, 196, 181, 166, 151, 136, + 121, 106, 91, 76, 61, 46, 31, 47, + 62, 77, 92, 107, 122, 137, 152, 167, + 182, 197, 212, 227, 242, 243, 228, 213, + 198, 183, 168, 153, 138, 123, 108, 93, + 78, 63, 79, 94, 109, 124, 139, 154, + 169, 184, 199, 214, 229, 244, 245, 230, + 215, 200, 185, 170, 155, 140, 125, 110, + 95, 111, 126, 141, 156, 171, 186, 201, + 216, 231, 246, 247, 232, 217, 202, 187, + 172, 157, 142, 127, 143, 158, 173, 188, + 203, 218, 233, 248, 249, 234, 219, 204, + 189, 174, 159, 175, 190, 205, 220, 235, + 250, 251, 236, 221, 206, 191, 207, 222, + 237, 252, 253, 238, 223, 239, 254, 255, }; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_DWT32X32HYBRID +DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { + 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, + 6, 6, 6, + 6, + 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { + 0, 1, 32, 64, 33, 2, 3, 34, + 65, 96, 128, 97, 66, 35, 4, + 16, 512, 528, + 5, + 36, 67, 98, 129, 160, 192, 161, 130, + 99, 68, 37, 6, 7, 38, 69, 100, + 131, 162, 193, 224, 256, 225, 194, 163, + 132, 101, 70, 39, 8, 9, 40, 71, + 102, 133, 164, 195, 226, 257, 288, 320, + 289, 258, 227, 196, 165, 134, 103, 72, + 41, 10, 11, 42, 73, 104, 135, 166, + 197, 228, 259, 290, 321, 352, 384, 353, + 322, 291, 260, 229, 198, 167, 136, 105, + 74, 43, 12, 13, 44, 75, 106, 137, + 168, 199, 230, 261, 292, 323, 354, 385, + 416, 448, 417, 386, 355, 324, 293, 262, + 231, 200, 169, 138, 107, 76, 45, 14, + 15, 46, 77, 108, 139, 170, 201, 232, + 263, 294, 325, 356, 387, 418, 449, 480, + 481, 450, 419, 388, 357, 326, 295, 264, + 233, 202, 171, 140, 109, 78, 47, 79, + 110, 141, 172, 203, 234, 265, 296, 327, + 358, 389, 420, 451, 482, 483, 452, 421, + 390, 359, 328, 297, 266, 235, 204, 173, + 142, 111, 143, 174, 205, 236, 267, 298, + 329, 360, 391, 422, 453, 484, 485, 454, + 423, 392, 361, 330, 299, 268, 237, 206, + 175, 207, 238, 269, 300, 331, 362, 393, + 424, 455, 486, 487, 456, 425, 394, 363, + 332, 301, 270, 239, 271, 302, 333, 364, + 395, 426, 457, 488, 489, 458, 427, 396, + 365, 334, 303, 335, 366, 397, 428, 459, + 490, 491, 460, 429, 398, 367, 399, 430, + 461, 492, 493, 462, 431, 463, 494, 495, + + 17, 513, 529, 48, 544, + 560, 80, 576, 592, 49, 545, 561, 18, + 514, 530, 19, 515, 531, 50, 546, 562, + 81, 577, 593, 112, 608, 624, 144, 640, + 656, 113, 609, 625, 82, 578, 594, 51, + 547, 563, 20, 516, 532, 21, 517, 533, + 52, 548, 564, 83, 579, 595, 114, 610, + 626, 145, 641, 657, 176, 672, 688, 208, + 704, 720, 177, 673, 689, 146, 642, 658, + 115, 611, 627, 84, 580, 596, 53, 549, + 565, 22, 518, 534, 23, 519, 535, 54, + 550, 566, 85, 581, 597, 116, 612, 628, + 147, 643, 659, 178, 674, 690, 209, 705, + 721, 240, 736, 752, 272, 768, 784, 241, + 737, 753, 210, 706, 722, 179, 675, 691, + 148, 644, 660, 117, 613, 629, 86, 582, + 598, 55, 551, 567, 24, 520, 536, 25, + 521, 537, 56, 552, 568, 87, 583, 599, + 118, 614, 630, 149, 645, 661, 180, 676, + 692, 211, 707, 723, 242, 738, 754, 273, + 769, 785, 304, 800, 816, 336, 832, 848, + 305, 801, 817, 274, 770, 786, 243, 739, + 755, 212, 708, 724, 181, 677, 693, 150, + 646, 662, 119, 615, 631, 88, 584, 600, + 57, 553, 569, 26, 522, 538, 27, 523, + 539, 58, 554, 570, 89, 585, 601, 120, + 616, 632, 151, 647, 663, 182, 678, 694, + 213, 709, 725, 244, 740, 756, 275, 771, + 787, 306, 802, 818, 337, 833, 849, 368, + 864, 880, 400, 896, 912, 369, 865, 881, + 338, 834, 850, 307, 803, 819, 276, 772, + 788, 245, 741, 757, 214, 710, 726, 183, + + 679, 695, 152, 648, 664, 121, 617, 633, + 90, 586, 602, 59, 555, 571, 28, 524, + 540, 29, 525, 541, 60, 556, 572, 91, + 587, 603, 122, 618, 634, 153, 649, 665, + 184, 680, 696, 215, 711, 727, 246, 742, + 758, 277, 773, 789, 308, 804, 820, 339, + 835, 851, 370, 866, 882, 401, 897, 913, + 432, 928, 944, 464, 960, 976, 433, 929, + 945, 402, 898, 914, 371, 867, 883, 340, + 836, 852, 309, 805, 821, 278, 774, 790, + 247, 743, 759, 216, 712, 728, 185, 681, + 697, 154, 650, 666, 123, 619, 635, 92, + 588, 604, 61, 557, 573, 30, 526, 542, + 31, 527, 543, 62, 558, 574, 93, 589, + 605, 124, 620, 636, 155, 651, 667, 186, + 682, 698, 217, 713, 729, 248, 744, 760, + 279, 775, 791, 310, 806, 822, 341, 837, + 853, 372, 868, 884, 403, 899, 915, 434, + 930, 946, 465, 961, 977, 496, 992, 1008, + 497, 993, 1009, 466, 962, 978, 435, 931, + 947, 404, 900, 916, 373, 869, 885, 342, + 838, 854, 311, 807, 823, 280, 776, 792, + 249, 745, 761, 218, 714, 730, 187, 683, + 699, 156, 652, 668, 125, 621, 637, 94, + 590, 606, 63, 559, 575, 95, 591, 607, + 126, 622, 638, 157, 653, 669, 188, 684, + 700, 219, 715, 731, 250, 746, 762, 281, + 777, 793, 312, 808, 824, 343, 839, 855, + 374, 870, 886, 405, 901, 917, 436, 932, + 948, 467, 963, 979, 498, 994, 1010, 499, + 995, 1011, 468, 964, 980, 437, 933, 949, + 406, 902, 918, 375, 871, 887, 344, 840, + + 856, 313, 809, 825, 282, 778, 794, 251, + 747, 763, 220, 716, 732, 189, 685, 701, + 158, 654, 670, 127, 623, 639, 159, 655, + 671, 190, 686, 702, 221, 717, 733, 252, + 748, 764, 283, 779, 795, 314, 810, 826, + 345, 841, 857, 376, 872, 888, 407, 903, + 919, 438, 934, 950, 469, 965, 981, 500, + 996, 1012, 501, 997, 1013, 470, 966, 982, + 439, 935, 951, 408, 904, 920, 377, 873, + 889, 346, 842, 858, 315, 811, 827, 284, + 780, 796, 253, 749, 765, 222, 718, 734, + 191, 687, 703, 223, 719, 735, 254, 750, + 766, 285, 781, 797, 316, 812, 828, 347, + 843, 859, 378, 874, 890, 409, 905, 921, + 440, 936, 952, 471, 967, 983, 502, 998, + 1014, 503, 999, 1015, 472, 968, 984, 441, + 937, 953, 410, 906, 922, 379, 875, 891, + 348, 844, 860, 317, 813, 829, 286, 782, + 798, 255, 751, 767, 287, 783, 799, 318, + 814, 830, 349, 845, 861, 380, 876, 892, + 411, 907, 923, 442, 938, 954, 473, 969, + 985, 504, 1000, 1016, 505, 1001, 1017, 474, + 970, 986, 443, 939, 955, 412, 908, 924, + 381, 877, 893, 350, 846, 862, 319, 815, + 831, 351, 847, 863, 382, 878, 894, 413, + 909, 925, 444, 940, 956, 475, 971, 987, + 506, 1002, 1018, 507, 1003, 1019, 476, 972, + 988, 445, 941, 957, 414, 910, 926, 383, + 879, 895, 415, 911, 927, 446, 942, 958, + 477, 973, 989, 508, 1004, 1020, 509, 1005, + 1021, 478, 974, 990, 447, 943, 959, 479, + 975, 991, 510, 1006, 1022, 511, 1007, 1023, +}; + +#else + DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, @@ -201,6 +430,7 @@ DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, }; + DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 0, 1, 32, 64, 33, 2, 3, 34, 65, 96, 128, 97, 66, 35, 4, 5, 36, 67, 98, 129, 160, 192, 161, 130, 99, 68, 37, 6, 7, 38, 69, 100, 131, 162, 193, 224, 256, 225, 194, 163, 132, 101, 70, 39, 8, 9, 40, 71, 102, 133, 164, 195, 226, 257, 288, 320, 289, 258, 227, 196, 165, 134, 103, 72, @@ -235,6 +465,8 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 951, 920, 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, 799, 830, 861, 892, 923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023, }; +#endif // CONFIG_DWT32X32HYBRID +#endif /* Array indices are identical to previously-existing CONTEXT_NODE indices */ diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index cc685b99e..b09f3bde6 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -888,8 +888,8 @@ static void idctcol(int *blk) { (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) | (x7 = blk[8 * 3]))) { blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3] - = blk[8 * 4] = blk[8 * 5] = blk[8 * 6] - = blk[8 * 7] = ((blk[8 * 0] + 32) >> 6); + = blk[8 * 4] = blk[8 * 5] = blk[8 * 6] + = blk[8 * 7] = ((blk[8 * 0] + 32) >> 6); return; } @@ -1127,25 +1127,25 @@ void vp9_short_idct16x16_c(short *input, short *output, int pitch) { #define TEST_INT_16x16_IDCT 1 #if !TEST_INT_16x16_IDCT -static const double C1 = 0.995184726672197; -static const double C2 = 0.98078528040323; -static const double C3 = 0.956940335732209; -static const double C4 = 0.923879532511287; -static const double C5 = 0.881921264348355; -static const double C6 = 0.831469612302545; -static const double C7 = 0.773010453362737; -static const double C8 = 0.707106781186548; -static const double C9 = 0.634393284163646; -static const double C10 = 0.555570233019602; -static const double C11 = 0.471396736825998; -static const double C12 = 0.38268343236509; -static const double C13 = 0.290284677254462; -static const double C14 = 0.195090322016128; -static const double C15 = 0.098017140329561; - static void butterfly_16x16_idct_1d(double input[16], double output[16]) { + static const double C1 = 0.995184726672197; + static const double C2 = 0.98078528040323; + static const double C3 = 0.956940335732209; + static const double C4 = 0.923879532511287; + static const double C5 = 0.881921264348355; + static const double C6 = 0.831469612302545; + static const double C7 = 0.773010453362737; + static const double C8 = 0.707106781186548; + static const double C9 = 0.634393284163646; + static const double C10 = 0.555570233019602; + static const double C11 = 0.471396736825998; + static const double C12 = 0.38268343236509; + static const double C13 = 0.290284677254462; + static const double C14 = 0.195090322016128; + static const double C15 = 0.098017140329561; + vp9_clear_system_state(); // Make it simd safe : __asm emms; { double step[16]; @@ -1389,6 +1389,12 @@ void vp9_short_idct16x16_c(short *input, short *output, int pitch) { } #else + +#define INITIAL_SHIFT 2 +#define INITIAL_ROUNDING (1 << (INITIAL_SHIFT - 1)) +#define RIGHT_SHIFT 14 +#define RIGHT_ROUNDING (1 << (RIGHT_SHIFT - 1)) + static const int16_t C1 = 16305; static const int16_t C2 = 16069; static const int16_t C3 = 15679; @@ -1405,212 +1411,207 @@ static const int16_t C13 = 4756; static const int16_t C14 = 3196; static const int16_t C15 = 1606; -#define INITIAL_SHIFT 2 -#define INITIAL_ROUNDING (1 << (INITIAL_SHIFT - 1)) -#define RIGHT_SHIFT 14 -#define RIGHT_ROUNDING (1 << (RIGHT_SHIFT - 1)) - static void butterfly_16x16_idct_1d(int16_t input[16], int16_t output[16], int last_shift_bits) { - int16_t step[16]; - int intermediate[16]; - int temp1, temp2; + int16_t step[16]; + int intermediate[16]; + int temp1, temp2; - int step1_shift = RIGHT_SHIFT + INITIAL_SHIFT; - int step1_rounding = 1 << (step1_shift - 1); - int last_rounding = 0; + int step1_shift = RIGHT_SHIFT + INITIAL_SHIFT; + int step1_rounding = 1 << (step1_shift - 1); + int last_rounding = 0; - if (last_shift_bits > 0) - last_rounding = 1 << (last_shift_bits - 1); + if (last_shift_bits > 0) + last_rounding = 1 << (last_shift_bits - 1); - // step 1 and 2 - step[ 0] = (input[0] + input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT; - step[ 1] = (input[0] - input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT; + // step 1 and 2 + step[ 0] = (input[0] + input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT; + step[ 1] = (input[0] - input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT; - temp1 = input[4] * C12; - temp2 = input[12] * C4; - temp1 = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 *= C8; - step[ 2] = (2 * (temp1) + step1_rounding) >> step1_shift; + temp1 = input[4] * C12; + temp2 = input[12] * C4; + temp1 = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 *= C8; + step[ 2] = (2 * (temp1) + step1_rounding) >> step1_shift; - temp1 = input[4] * C4; - temp2 = input[12] * C12; - temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 *= C8; - step[ 3] = (2 * (temp1) + step1_rounding) >> step1_shift; + temp1 = input[4] * C4; + temp2 = input[12] * C12; + temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 *= C8; + step[ 3] = (2 * (temp1) + step1_rounding) >> step1_shift; - temp1 = input[2] * C8; - temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp2 = input[6] + input[10]; - step[ 4] = (temp1 + temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT; - step[ 5] = (temp1 - temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT; + temp1 = input[2] * C8; + temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp2 = input[6] + input[10]; + step[ 4] = (temp1 + temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT; + step[ 5] = (temp1 - temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT; - temp1 = input[14] * C8; - temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp2 = input[6] - input[10]; - step[ 6] = (temp2 - temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT; - step[ 7] = (temp2 + temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT; + temp1 = input[14] * C8; + temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp2 = input[6] - input[10]; + step[ 6] = (temp2 - temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT; + step[ 7] = (temp2 + temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT; - // for odd input - temp1 = input[3] * C12; - temp2 = input[13] * C4; - temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 *= C8; - intermediate[ 8] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + // for odd input + temp1 = input[3] * C12; + temp2 = input[13] * C4; + temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 *= C8; + intermediate[ 8] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = input[3] * C4; - temp2 = input[13] * C12; - temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp2 *= C8; - intermediate[ 9] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = input[3] * C4; + temp2 = input[13] * C12; + temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp2 *= C8; + intermediate[ 9] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT; - intermediate[10] = (2 * (input[9] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT; - intermediate[11] = input[15] - input[1]; - intermediate[12] = input[15] + input[1]; - intermediate[13] = (2 * (input[7] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + intermediate[10] = (2 * (input[9] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + intermediate[11] = input[15] - input[1]; + intermediate[12] = input[15] + input[1]; + intermediate[13] = (2 * (input[7] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = input[11] * C12; - temp2 = input[5] * C4; - temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp2 *= C8; - intermediate[14] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = input[11] * C12; + temp2 = input[5] * C4; + temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp2 *= C8; + intermediate[14] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = input[11] * C4; - temp2 = input[5] * C12; - temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 *= C8; - intermediate[15] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = input[11] * C4; + temp2 = input[5] * C12; + temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 *= C8; + intermediate[15] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT; - step[ 8] = (intermediate[ 8] + intermediate[14] + INITIAL_ROUNDING) - >> INITIAL_SHIFT; - step[ 9] = (intermediate[ 9] + intermediate[15] + INITIAL_ROUNDING) - >> INITIAL_SHIFT; - step[10] = (intermediate[10] + intermediate[11] + INITIAL_ROUNDING) - >> INITIAL_SHIFT; - step[11] = (intermediate[10] - intermediate[11] + INITIAL_ROUNDING) - >> INITIAL_SHIFT; - step[12] = (intermediate[12] + intermediate[13] + INITIAL_ROUNDING) - >> INITIAL_SHIFT; - step[13] = (intermediate[12] - intermediate[13] + INITIAL_ROUNDING) - >> INITIAL_SHIFT; - step[14] = (intermediate[ 8] - intermediate[14] + INITIAL_ROUNDING) - >> INITIAL_SHIFT; - step[15] = (intermediate[ 9] - intermediate[15] + INITIAL_ROUNDING) - >> INITIAL_SHIFT; + step[ 8] = (intermediate[ 8] + intermediate[14] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[ 9] = (intermediate[ 9] + intermediate[15] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[10] = (intermediate[10] + intermediate[11] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[11] = (intermediate[10] - intermediate[11] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[12] = (intermediate[12] + intermediate[13] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[13] = (intermediate[12] - intermediate[13] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[14] = (intermediate[ 8] - intermediate[14] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; + step[15] = (intermediate[ 9] - intermediate[15] + INITIAL_ROUNDING) + >> INITIAL_SHIFT; - // step 3 - output[0] = step[ 0] + step[ 3]; - output[1] = step[ 1] + step[ 2]; - output[2] = step[ 1] - step[ 2]; - output[3] = step[ 0] - step[ 3]; + // step 3 + output[0] = step[ 0] + step[ 3]; + output[1] = step[ 1] + step[ 2]; + output[2] = step[ 1] - step[ 2]; + output[3] = step[ 0] - step[ 3]; - temp1 = step[ 4] * C14; - temp2 = step[ 7] * C2; - output[4] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = step[ 4] * C14; + temp2 = step[ 7] * C2; + output[4] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = step[ 4] * C2; - temp2 = step[ 7] * C14; - output[7] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = step[ 4] * C2; + temp2 = step[ 7] * C14; + output[7] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = step[ 5] * C10; - temp2 = step[ 6] * C6; - output[5] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = step[ 5] * C10; + temp2 = step[ 6] * C6; + output[5] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = step[ 5] * C6; - temp2 = step[ 6] * C10; - output[6] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = step[ 5] * C6; + temp2 = step[ 6] * C10; + output[6] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - output[8] = step[ 8] + step[11]; - output[9] = step[ 9] + step[10]; - output[10] = step[ 9] - step[10]; - output[11] = step[ 8] - step[11]; - output[12] = step[12] + step[15]; - output[13] = step[13] + step[14]; - output[14] = step[13] - step[14]; - output[15] = step[12] - step[15]; + output[8] = step[ 8] + step[11]; + output[9] = step[ 9] + step[10]; + output[10] = step[ 9] - step[10]; + output[11] = step[ 8] - step[11]; + output[12] = step[12] + step[15]; + output[13] = step[13] + step[14]; + output[14] = step[13] - step[14]; + output[15] = step[12] - step[15]; - // output 4 - step[ 0] = output[0] + output[7]; - step[ 1] = output[1] + output[6]; - step[ 2] = output[2] + output[5]; - step[ 3] = output[3] + output[4]; - step[ 4] = output[3] - output[4]; - step[ 5] = output[2] - output[5]; - step[ 6] = output[1] - output[6]; - step[ 7] = output[0] - output[7]; + // output 4 + step[ 0] = output[0] + output[7]; + step[ 1] = output[1] + output[6]; + step[ 2] = output[2] + output[5]; + step[ 3] = output[3] + output[4]; + step[ 4] = output[3] - output[4]; + step[ 5] = output[2] - output[5]; + step[ 6] = output[1] - output[6]; + step[ 7] = output[0] - output[7]; - temp1 = output[8] * C7; - temp2 = output[15] * C9; - step[ 8] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = output[8] * C7; + temp2 = output[15] * C9; + step[ 8] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = output[9] * C11; - temp2 = output[14] * C5; - step[ 9] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = output[9] * C11; + temp2 = output[14] * C5; + step[ 9] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = output[10] * C3; - temp2 = output[13] * C13; - step[10] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = output[10] * C3; + temp2 = output[13] * C13; + step[10] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = output[11] * C15; - temp2 = output[12] * C1; - step[11] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = output[11] * C15; + temp2 = output[12] * C1; + step[11] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = output[11] * C1; - temp2 = output[12] * C15; - step[12] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = output[11] * C1; + temp2 = output[12] * C15; + step[12] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = output[10] * C13; - temp2 = output[13] * C3; - step[13] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = output[10] * C13; + temp2 = output[13] * C3; + step[13] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = output[9] * C5; - temp2 = output[14] * C11; - step[14] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = output[9] * C5; + temp2 = output[14] * C11; + step[14] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - temp1 = output[8] * C9; - temp2 = output[15] * C7; - step[15] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; + temp1 = output[8] * C9; + temp2 = output[15] * C7; + step[15] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT; - // step 5 - output[0] = (step[0] + step[15] + last_rounding) >> last_shift_bits; - output[1] = (step[1] + step[14] + last_rounding) >> last_shift_bits; - output[2] = (step[2] + step[13] + last_rounding) >> last_shift_bits; - output[3] = (step[3] + step[12] + last_rounding) >> last_shift_bits; - output[4] = (step[4] + step[11] + last_rounding) >> last_shift_bits; - output[5] = (step[5] + step[10] + last_rounding) >> last_shift_bits; - output[6] = (step[6] + step[ 9] + last_rounding) >> last_shift_bits; - output[7] = (step[7] + step[ 8] + last_rounding) >> last_shift_bits; + // step 5 + output[0] = (step[0] + step[15] + last_rounding) >> last_shift_bits; + output[1] = (step[1] + step[14] + last_rounding) >> last_shift_bits; + output[2] = (step[2] + step[13] + last_rounding) >> last_shift_bits; + output[3] = (step[3] + step[12] + last_rounding) >> last_shift_bits; + output[4] = (step[4] + step[11] + last_rounding) >> last_shift_bits; + output[5] = (step[5] + step[10] + last_rounding) >> last_shift_bits; + output[6] = (step[6] + step[ 9] + last_rounding) >> last_shift_bits; + output[7] = (step[7] + step[ 8] + last_rounding) >> last_shift_bits; - output[15] = (step[0] - step[15] + last_rounding) >> last_shift_bits; - output[14] = (step[1] - step[14] + last_rounding) >> last_shift_bits; - output[13] = (step[2] - step[13] + last_rounding) >> last_shift_bits; - output[12] = (step[3] - step[12] + last_rounding) >> last_shift_bits; - output[11] = (step[4] - step[11] + last_rounding) >> last_shift_bits; - output[10] = (step[5] - step[10] + last_rounding) >> last_shift_bits; - output[9] = (step[6] - step[ 9] + last_rounding) >> last_shift_bits; - output[8] = (step[7] - step[ 8] + last_rounding) >> last_shift_bits; + output[15] = (step[0] - step[15] + last_rounding) >> last_shift_bits; + output[14] = (step[1] - step[14] + last_rounding) >> last_shift_bits; + output[13] = (step[2] - step[13] + last_rounding) >> last_shift_bits; + output[12] = (step[3] - step[12] + last_rounding) >> last_shift_bits; + output[11] = (step[4] - step[11] + last_rounding) >> last_shift_bits; + output[10] = (step[5] - step[10] + last_rounding) >> last_shift_bits; + output[9] = (step[6] - step[ 9] + last_rounding) >> last_shift_bits; + output[8] = (step[7] - step[ 8] + last_rounding) >> last_shift_bits; } void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { - int16_t out[16 * 16]; - int16_t *outptr = &out[0]; - const int short_pitch = pitch >> 1; - int i, j; - int16_t temp_in[16], temp_out[16]; + int16_t out[16 * 16]; + int16_t *outptr = &out[0]; + const int short_pitch = pitch >> 1; + int i, j; + int16_t temp_in[16], temp_out[16]; - // First transform rows - for (i = 0; i < 16; ++i) { - butterfly_16x16_idct_1d(input, outptr, 0); - input += short_pitch; - outptr += 16; - } + // First transform rows + for (i = 0; i < 16; ++i) { + butterfly_16x16_idct_1d(input, outptr, 0); + input += short_pitch; + outptr += 16; + } - // Then transform columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - butterfly_16x16_idct_1d(temp_in, temp_out, 3); - for (j = 0; j < 16; ++j) + // Then transform columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j * 16 + i]; + butterfly_16x16_idct_1d(temp_in, temp_out, 3); + for (j = 0; j < 16; ++j) output[j * 16 + i] = temp_out[j]; } } @@ -2120,16 +2121,20 @@ void vp9_short_idct32x32_c(short *input, short *output, int pitch) { } vp9_clear_system_state(); // Make it simd safe : __asm emms; } + #else // CONFIG_DWT32X32HYBRID -#define MAX_BLOCK_LENGTH 64 -#define ENH_PRECISION_BITS 1 -#define ENH_PRECISION_RND ((1 << ENH_PRECISION_BITS) / 2) +#define DWT_MAX_LENGTH 32 +#define DWT_TYPE 26 // 26/53/97 +#define DWT_PRECISION_BITS 2 +#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2) + +#if DWT_TYPE == 53 // Note: block length must be even for this implementation static void synthesis_53_row(int length, short *lowpass, short *highpass, short *x) { - short r, * a, * b; + short r, *a, *b; int n; n = length >> 1; @@ -2148,13 +2153,13 @@ static void synthesis_53_row(int length, short *lowpass, short *highpass, *x++ = ((r = *a++) + 1) >> 1; *x++ = *b++ + ((r + (*a) + 2) >> 2); } - *x++ = ((r = *a) + 1)>>1; - *x++ = *b + ((r+1)>>1); + *x++ = ((r = *a) + 1) >> 1; + *x++ = *b + ((r + 1) >> 1); } static void synthesis_53_col(int length, short *lowpass, short *highpass, short *x) { - short r, * a, * b; + short r, *a, *b; int n; n = length >> 1; @@ -2170,19 +2175,18 @@ static void synthesis_53_col(int length, short *lowpass, short *highpass, b = highpass; a = lowpass; while (--n) { - *x++ = r = *a++; + r = *a++; + *x++ = r; *x++ = ((*b++) << 1) + ((r + (*a) + 1) >> 1); } - *x++ = r = *a; - *x++ = ((*b) << 1) + r; + *x++ = *a; + *x++ = ((*b) << 1) + *a; } -// NOTE: Using a 5/3 integer wavelet for now. Explore using a wavelet -// with a better response later -void dyadic_synthesize(int levels, int width, int height, short *c, int pitch_c, - short *x, int pitch_x) { +void dyadic_synthesize_53(int levels, int width, int height, short *c, + int pitch_c, short *x, int pitch_x) { int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; - short buffer[2 * MAX_BLOCK_LENGTH]; + short buffer[2 * DWT_MAX_LENGTH]; th[0] = hh; tw[0] = hw; @@ -2204,35 +2208,466 @@ void dyadic_synthesize(int levels, int width, int height, short *c, int pitch_c, c[i * pitch_c + j] = buffer[i + nh]; } for (i = 0; i < nh; i++) { - memcpy(buffer, &c[i * pitch_c], nw * sizeof(short)); + memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer)); synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]); } } + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ? + ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) : + -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS); + } + } +} + +#elif DWT_TYPE == 26 + +// Note: block length must be even for this implementation +static void synthesis_26_row(int length, short *lowpass, short *highpass, + short *x) { + short r, s, *a, *b; + int i, n = length >> 1; + + if (n >= 4) { + a = lowpass; + b = highpass; + r = *lowpass; + while (--n) { + *b++ += (r - a[1] + 4) >> 3; + r = *a++; + } + *b += (r - *a + 4) >> 3; + } + a = lowpass; + b = highpass; + for (i = length >> 1; i; i--) { + s = *b++; + r = *a++; + *x++ = (r + s + 1) >> 1; + *x++ = (r - s + 1) >> 1; + } +} + +static void synthesis_26_col(int length, short *lowpass, short *highpass, + short *x) { + short r, s, *a, *b; + int i, n = length >> 1; + + if (n >= 4) { + a = lowpass; + b = highpass; + r = *lowpass; + while (--n) { + *b++ += (r - a[1] + 4) >> 3; + r = *a++; + } + *b += (r - *a + 4) >> 3; + } + a = lowpass; + b = highpass; + for (i = length >> 1; i; i--) { + s = *b++; + r = *a++; + *x++ = r + s; + *x++ = r - s; + } +} + +void dyadic_synthesize_26(int levels, int width, int height, short *c, + int pitch_c, short *x, int pitch_x) { + int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; + short buffer[2 * DWT_MAX_LENGTH]; + + th[0] = hh; + tw[0] = hw; + for (i = 1; i <= levels; i++) { + th[i] = (th[i - 1] + 1) >> 1; + tw[i] = (tw[i - 1] + 1) >> 1; + } + for (lv = levels - 1; lv >= 0; lv--) { + nh = th[lv]; + nw = tw[lv]; + hh = th[lv + 1]; + hw = tw[lv + 1]; + if ((nh < 2) || (nw < 2)) continue; + for (j = 0; j < nw; j++) { + for (i = 0; i < nh; i++) + buffer[i] = c[i * pitch_c + j]; + synthesis_26_col(nh, buffer, buffer + hh, buffer + nh); + for (i = 0; i < nh; i++) + c[i * pitch_c + j] = buffer[i + nh]; + } + for (i = 0; i < nh; i++) { + memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer)); + synthesis_26_row(nw, buffer, buffer + hw, &c[i * pitch_c]); + } + } + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ? + ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) : + -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS); + } + } +} + +#elif DWT_TYPE == 97 + +static void synthesis_97(int length, double *lowpass, double *highpass, + double *x) { + static const double a_predict1 = -1.586134342; + static const double a_update1 = -0.05298011854; + static const double a_predict2 = 0.8829110762; + static const double a_update2 = 0.4435068522; + static const double s_low = 1.149604398; + static const double s_high = 1/1.149604398; + static const double inv_s_low = 1 / s_low; + static const double inv_s_high = 1 / s_high; + int i; + double y[DWT_MAX_LENGTH]; + // Undo pack and scale + for (i = 0; i < length / 2; i++) { + y[i * 2] = lowpass[i] * inv_s_low; + y[i * 2 + 1] = highpass[i] * inv_s_high; + } + memcpy(x, y, sizeof(*y) * length); + // Undo update 2 + for (i = 2; i < length; i += 2) { + x[i] -= a_update2 * (x[i-1] + x[i+1]); + } + x[0] -= 2 * a_update2 * x[1]; + // Undo predict 2 + for (i = 1; i < length - 2; i += 2) { + x[i] -= a_predict2 * (x[i - 1] + x[i + 1]); + } + x[length - 1] -= 2 * a_predict2 * x[length - 2]; + // Undo update 1 + for (i = 2; i < length; i += 2) { + x[i] -= a_update1 * (x[i - 1] + x[i + 1]); + } + x[0] -= 2 * a_update1 * x[1]; + // Undo predict 1 + for (i = 1; i < length - 2; i += 2) { + x[i] -= a_predict1 * (x[i - 1] + x[i + 1]); + } + x[length - 1] -= 2 * a_predict1 * x[length - 2]; +} + +void dyadic_synthesize_97(int levels, int width, int height, short *c, + int pitch_c, short *x, int pitch_x) { + int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; + double buffer[2 * DWT_MAX_LENGTH]; + double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH]; + + th[0] = hh; + tw[0] = hw; + for (i = 1; i <= levels; i++) { + th[i] = (th[i - 1] + 1) >> 1; + tw[i] = (tw[i - 1] + 1) >> 1; + } + for (lv = levels - 1; lv >= 0; lv--) { + nh = th[lv]; + nw = tw[lv]; + hh = th[lv + 1]; + hw = tw[lv + 1]; + if ((nh < 2) || (nw < 2)) continue; + for (j = 0; j < nw; j++) { + for (i = 0; i < nh; i++) + buffer[i] = c[i * pitch_c + j]; + synthesis_97(nh, buffer, buffer + hh, buffer + nh); + for (i = 0; i < nh; i++) + y[i * DWT_MAX_LENGTH + j] = buffer[i + nh]; + } + for (i = 0; i < nh; i++) { + memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer)); + synthesis_97(nw, buffer, buffer + hw, &y[i * DWT_MAX_LENGTH]); + } + } for (i = 0; i < height; i++) for (j = 0; j < width; j++) - x[i * pitch_x + j] = (c[i * pitch_c + j] + ENH_PRECISION_RND) >> - ENH_PRECISION_BITS; + x[i * pitch_x + j] = round(y[i * DWT_MAX_LENGTH + j] / + (1 << DWT_PRECISION_BITS)); +} + +#endif // DWT_TYPE + +// TODO(debargha): Implement scaling differently so as not to have to use the +// floating point 16x16 dct +static void butterfly_16x16_idct_1d_f(double input[16], double output[16]) { + static const double C1 = 0.995184726672197; + static const double C2 = 0.98078528040323; + static const double C3 = 0.956940335732209; + static const double C4 = 0.923879532511287; + static const double C5 = 0.881921264348355; + static const double C6 = 0.831469612302545; + static const double C7 = 0.773010453362737; + static const double C8 = 0.707106781186548; + static const double C9 = 0.634393284163646; + static const double C10 = 0.555570233019602; + static const double C11 = 0.471396736825998; + static const double C12 = 0.38268343236509; + static const double C13 = 0.290284677254462; + static const double C14 = 0.195090322016128; + static const double C15 = 0.098017140329561; + + vp9_clear_system_state(); // Make it simd safe : __asm emms; + { + double step[16]; + double intermediate[16]; + double temp1, temp2; + + + // step 1 and 2 + step[ 0] = input[0] + input[8]; + step[ 1] = input[0] - input[8]; + + temp1 = input[4]*C12; + temp2 = input[12]*C4; + + temp1 -= temp2; + temp1 *= C8; + + step[ 2] = 2*(temp1); + + temp1 = input[4]*C4; + temp2 = input[12]*C12; + temp1 += temp2; + temp1 = (temp1); + temp1 *= C8; + step[ 3] = 2*(temp1); + + temp1 = input[2]*C8; + temp1 = 2*(temp1); + temp2 = input[6] + input[10]; + + step[ 4] = temp1 + temp2; + step[ 5] = temp1 - temp2; + + temp1 = input[14]*C8; + temp1 = 2*(temp1); + temp2 = input[6] - input[10]; + + step[ 6] = temp2 - temp1; + step[ 7] = temp2 + temp1; + + // for odd input + temp1 = input[3]*C12; + temp2 = input[13]*C4; + temp1 += temp2; + temp1 = (temp1); + temp1 *= C8; + intermediate[ 8] = 2*(temp1); + + temp1 = input[3]*C4; + temp2 = input[13]*C12; + temp2 -= temp1; + temp2 = (temp2); + temp2 *= C8; + intermediate[ 9] = 2*(temp2); + + intermediate[10] = 2*(input[9]*C8); + intermediate[11] = input[15] - input[1]; + intermediate[12] = input[15] + input[1]; + intermediate[13] = 2*((input[7]*C8)); + + temp1 = input[11]*C12; + temp2 = input[5]*C4; + temp2 -= temp1; + temp2 = (temp2); + temp2 *= C8; + intermediate[14] = 2*(temp2); + + temp1 = input[11]*C4; + temp2 = input[5]*C12; + temp1 += temp2; + temp1 = (temp1); + temp1 *= C8; + intermediate[15] = 2*(temp1); + + step[ 8] = intermediate[ 8] + intermediate[14]; + step[ 9] = intermediate[ 9] + intermediate[15]; + step[10] = intermediate[10] + intermediate[11]; + step[11] = intermediate[10] - intermediate[11]; + step[12] = intermediate[12] + intermediate[13]; + step[13] = intermediate[12] - intermediate[13]; + step[14] = intermediate[ 8] - intermediate[14]; + step[15] = intermediate[ 9] - intermediate[15]; + + // step 3 + output[0] = step[ 0] + step[ 3]; + output[1] = step[ 1] + step[ 2]; + output[2] = step[ 1] - step[ 2]; + output[3] = step[ 0] - step[ 3]; + + temp1 = step[ 4]*C14; + temp2 = step[ 7]*C2; + temp1 -= temp2; + output[4] = (temp1); + + temp1 = step[ 4]*C2; + temp2 = step[ 7]*C14; + temp1 += temp2; + output[7] = (temp1); + + temp1 = step[ 5]*C10; + temp2 = step[ 6]*C6; + temp1 -= temp2; + output[5] = (temp1); + + temp1 = step[ 5]*C6; + temp2 = step[ 6]*C10; + temp1 += temp2; + output[6] = (temp1); + + output[8] = step[ 8] + step[11]; + output[9] = step[ 9] + step[10]; + output[10] = step[ 9] - step[10]; + output[11] = step[ 8] - step[11]; + output[12] = step[12] + step[15]; + output[13] = step[13] + step[14]; + output[14] = step[13] - step[14]; + output[15] = step[12] - step[15]; + + // output 4 + step[ 0] = output[0] + output[7]; + step[ 1] = output[1] + output[6]; + step[ 2] = output[2] + output[5]; + step[ 3] = output[3] + output[4]; + step[ 4] = output[3] - output[4]; + step[ 5] = output[2] - output[5]; + step[ 6] = output[1] - output[6]; + step[ 7] = output[0] - output[7]; + + temp1 = output[8]*C7; + temp2 = output[15]*C9; + temp1 -= temp2; + step[ 8] = (temp1); + + temp1 = output[9]*C11; + temp2 = output[14]*C5; + temp1 += temp2; + step[ 9] = (temp1); + + temp1 = output[10]*C3; + temp2 = output[13]*C13; + temp1 -= temp2; + step[10] = (temp1); + + temp1 = output[11]*C15; + temp2 = output[12]*C1; + temp1 += temp2; + step[11] = (temp1); + + temp1 = output[11]*C1; + temp2 = output[12]*C15; + temp2 -= temp1; + step[12] = (temp2); + + temp1 = output[10]*C13; + temp2 = output[13]*C3; + temp1 += temp2; + step[13] = (temp1); + + temp1 = output[9]*C5; + temp2 = output[14]*C11; + temp2 -= temp1; + step[14] = (temp2); + + temp1 = output[8]*C9; + temp2 = output[15]*C7; + temp1 += temp2; + step[15] = (temp1); + + // step 5 + output[0] = (step[0] + step[15]); + output[1] = (step[1] + step[14]); + output[2] = (step[2] + step[13]); + output[3] = (step[3] + step[12]); + output[4] = (step[4] + step[11]); + output[5] = (step[5] + step[10]); + output[6] = (step[6] + step[ 9]); + output[7] = (step[7] + step[ 8]); + + output[15] = (step[0] - step[15]); + output[14] = (step[1] - step[14]); + output[13] = (step[2] - step[13]); + output[12] = (step[3] - step[12]); + output[11] = (step[4] - step[11]); + output[10] = (step[5] - step[10]); + output[9] = (step[6] - step[ 9]); + output[8] = (step[7] - step[ 8]); + } + vp9_clear_system_state(); // Make it simd safe : __asm emms; +} + +void vp9_short_idct16x16_c_f(short *input, short *output, int pitch) { + vp9_clear_system_state(); // Make it simd safe : __asm emms; + { + double out[16*16], out2[16*16]; + const int short_pitch = pitch >> 1; + int i, j; + // First transform rows + for (i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (j = 0; j < 16; ++j) + temp_in[j] = input[j + i*short_pitch]; + butterfly_16x16_idct_1d_f(temp_in, temp_out); + for (j = 0; j < 16; ++j) + out[j + i*16] = temp_out[j]; + } + // Then transform columns + for (i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (j = 0; j < 16; ++j) + temp_in[j] = out[j*16 + i]; + butterfly_16x16_idct_1d_f(temp_in, temp_out); + for (j = 0; j < 16; ++j) + out2[j*16 + i] = temp_out[j]; + } + for (i = 0; i < 16*16; ++i) + output[i] = round(out2[i] / (64 >> DWT_PRECISION_BITS)); + } + vp9_clear_system_state(); // Make it simd safe : __asm emms; } void vp9_short_idct32x32_c(short *input, short *output, int pitch) { // assume out is a 32x32 buffer + // Temporary buffer to hold a 16x16 block for 16x16 inverse dct short buffer[16 * 16]; + // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt short buffer2[32 * 32]; + // Note: pitch is in bytes, short_pitch is in short units const int short_pitch = pitch >> 1; int i; + // TODO(debargha): Implement more efficiently by adding output pitch // argument to the idct16x16 function - vp9_short_idct16x16_c(input, buffer, pitch); + vp9_short_idct16x16_c_f(input, buffer, pitch); for (i = 0; i < 16; ++i) { - vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(short) * 16); - vpx_memcpy(buffer2 + i * 32 + 16, input + i * short_pitch + 16, - sizeof(short) * 16); + vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16); } - for (; i < 32; ++i) { - vpx_memcpy(buffer2 + i * 32, input + i * short_pitch, - sizeof(short) * 32); + vp9_short_idct16x16_c_f(input + 16, buffer, pitch); + for (i = 0; i < 16; ++i) { + vpx_memcpy(buffer2 + i * 32 + 16, buffer + i * 16, sizeof(*buffer2) * 16); } - dyadic_synthesize(1, 32, 32, buffer2, 32, output, 32); + vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch); + for (i = 0; i < 16; ++i) { + vpx_memcpy(buffer2 + i * 32 + 16 * 32, buffer + i * 16, + sizeof(*buffer2) * 16); + } + vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch); + for (i = 0; i < 16; ++i) { + vpx_memcpy(buffer2 + i * 32 + 16 * 33, buffer + i * 16, + sizeof(*buffer2) * 16); + } +#if DWT_TYPE == 26 + dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32); +#elif DWT_TYPE == 97 + dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32); +#elif DWT_TYPE == 53 + dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32); +#endif } #endif // CONFIG_DWT32X32HYBRID #endif // CONFIG_TX32X32 diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 0fc8fa35e..90baafe53 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -902,23 +902,24 @@ void vp9_short_walsh8x4_x8_c(short *input, short *output, int pitch) { #define TEST_INT_16x16_DCT 1 #if !TEST_INT_16x16_DCT -static const double C1 = 0.995184726672197; -static const double C2 = 0.98078528040323; -static const double C3 = 0.956940335732209; -static const double C4 = 0.923879532511287; -static const double C5 = 0.881921264348355; -static const double C6 = 0.831469612302545; -static const double C7 = 0.773010453362737; -static const double C8 = 0.707106781186548; -static const double C9 = 0.634393284163646; -static const double C10 = 0.555570233019602; -static const double C11 = 0.471396736825998; -static const double C12 = 0.38268343236509; -static const double C13 = 0.290284677254462; -static const double C14 = 0.195090322016128; -static const double C15 = 0.098017140329561; static void dct16x16_1d(double input[16], double output[16]) { + static const double C1 = 0.995184726672197; + static const double C2 = 0.98078528040323; + static const double C3 = 0.956940335732209; + static const double C4 = 0.923879532511287; + static const double C5 = 0.881921264348355; + static const double C6 = 0.831469612302545; + static const double C7 = 0.773010453362737; + static const double C8 = 0.707106781186548; + static const double C9 = 0.634393284163646; + static const double C10 = 0.555570233019602; + static const double C11 = 0.471396736825998; + static const double C12 = 0.38268343236509; + static const double C13 = 0.290284677254462; + static const double C14 = 0.195090322016128; + static const double C15 = 0.098017140329561; + vp9_clear_system_state(); // Make it simd safe : __asm emms; { double step[16]; @@ -1686,15 +1687,18 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { #else // CONFIG_DWT32X32HYBRID -#define MAX_BLOCK_LENGTH 64 -#define ENH_PRECISION_BITS 1 -#define ENH_PRECISION_RND ((1 << ENH_PRECISION_BITS) / 2) +#define DWT_MAX_LENGTH 64 +#define DWT_TYPE 26 // 26/53/97 +#define DWT_PRECISION_BITS 2 +#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2) + +#if DWT_TYPE == 53 // Note: block length must be even for this implementation static void analysis_53_row(int length, short *x, short *lowpass, short *highpass) { int n; - short r, * a, * b; + short r, *a, *b; n = length >> 1; b = highpass; @@ -1720,7 +1724,7 @@ static void analysis_53_row(int length, short *x, static void analysis_53_col(int length, short *x, short *lowpass, short *highpass) { int n; - short r, * a, * b; + short r, *a, *b; n = length >> 1; b = highpass; @@ -1743,15 +1747,13 @@ static void analysis_53_col(int length, short *x, } } -// NOTE: Using a 5/3 integer wavelet for now. Explore using a wavelet -// with a better response later -static void dyadic_analyze(int levels, int width, int height, - short *x, int pitch_x, short *c, int pitch_c) { +static void dyadic_analyze_53(int levels, int width, int height, + short *x, int pitch_x, short *c, int pitch_c) { int lv, i, j, nh, nw, hh = height, hw = width; - short buffer[2 * MAX_BLOCK_LENGTH]; + short buffer[2 * DWT_MAX_LENGTH]; for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { - c[i * pitch_c + j] = x[i * pitch_x + j] << ENH_PRECISION_BITS; + c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS; } } for (lv = 0; lv < levels; lv++) { @@ -1774,17 +1776,398 @@ static void dyadic_analyze(int levels, int width, int height, } } +#elif DWT_TYPE == 26 + +static void analysis_26_row(int length, short *x, + short *lowpass, short *highpass) { + int i, n; + short r, s, *a, *b; + a = lowpass; + b = highpass; + for (i = length >> 1; i; i--) { + r = *x++; + s = *x++; + *a++ = r + s; + *b++ = r - s; + } + n = length >> 1; + if (n >= 4) { + a = lowpass; + b = highpass; + r = *lowpass; + while (--n) { + *b++ -= (r - a[1] + 4) >> 3; + r = *a++; + } + *b -= (r - *a + 4) >> 3; + } +} + +static void analysis_26_col(int length, short *x, + short *lowpass, short *highpass) { + int i, n; + short r, s, *a, *b; + a = lowpass; + b = highpass; + for (i = length >> 1; i; i--) { + r = *x++; + s = *x++; + *a++ = (r + s + 1) >> 1; + *b++ = (r - s + 1) >> 1; + } + n = length >> 1; + if (n >= 4) { + a = lowpass; + b = highpass; + r = *lowpass; + while (--n) { + *b++ -= (r - a[1] + 4) >> 3; + r = *a++; + } + *b -= (r - *a + 4) >> 3; + } +} + +static void dyadic_analyze_26(int levels, int width, int height, + short *x, int pitch_x, short *c, int pitch_c) { + int lv, i, j, nh, nw, hh = height, hw = width; + short buffer[2 * DWT_MAX_LENGTH]; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS; + } + } + for (lv = 0; lv < levels; lv++) { + nh = hh; + hh = (hh + 1) >> 1; + nw = hw; + hw = (hw + 1) >> 1; + if ((nh < 2) || (nw < 2)) return; + for (i = 0; i < nh; i++) { + memcpy(buffer, &c[i * pitch_c], nw * sizeof(short)); + analysis_26_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw); + } + for (j = 0; j < nw; j++) { + for (i = 0; i < nh; i++) + buffer[i + nh] = c[i * pitch_c + j]; + analysis_26_col(nh, buffer + nh, buffer, buffer + hh); + for (i = 0; i < nh; i++) + c[i * pitch_c + j] = buffer[i]; + } + } +} + +#elif DWT_TYPE == 97 + +static void analysis_97(int length, double *x, + double *lowpass, double *highpass) { + static const double a_predict1 = -1.586134342; + static const double a_update1 = -0.05298011854; + static const double a_predict2 = 0.8829110762; + static const double a_update2 = 0.4435068522; + static const double s_low = 1.149604398; + static const double s_high = 1/1.149604398; + int i; + double y[DWT_MAX_LENGTH]; + // Predict 1 + for (i = 1; i < length - 2; i += 2) { + x[i] += a_predict1 * (x[i - 1] + x[i + 1]); + } + x[length - 1] += 2 * a_predict1 * x[length - 2]; + // Update 1 + for (i = 2; i < length; i += 2) { + x[i] += a_update1 * (x[i - 1] + x[i + 1]); + } + x[0] += 2 * a_update1 * x[1]; + // Predict 2 + for (i = 1; i < length - 2; i += 2) { + x[i] += a_predict2 * (x[i - 1] + x[i + 1]); + } + x[length - 1] += 2 * a_predict2 * x[length - 2]; + // Update 2 + for (i = 2; i < length; i += 2) { + x[i] += a_update2 * (x[i - 1] + x[i + 1]); + } + x[0] += 2 * a_update2 * x[1]; + memcpy(y, x, sizeof(*y) * length); + // Scale and pack + for (i = 0; i < length / 2; i++) { + lowpass[i] = y[2 * i] * s_low; + highpass[i] = y[2 * i + 1] * s_high; + } +} + +static void dyadic_analyze_97(int levels, int width, int height, + short *x, int pitch_x, short *c, int pitch_c) { + int lv, i, j, nh, nw, hh = height, hw = width; + double buffer[2 * DWT_MAX_LENGTH]; + double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH]; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + y[i * DWT_MAX_LENGTH + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS; + } + } + for (lv = 0; lv < levels; lv++) { + nh = hh; + hh = (hh + 1) >> 1; + nw = hw; + hw = (hw + 1) >> 1; + if ((nh < 2) || (nw < 2)) return; + for (i = 0; i < nh; i++) { + memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer)); + analysis_97(nw, buffer, &y[i * DWT_MAX_LENGTH], + &y[i * DWT_MAX_LENGTH] + hw); + } + for (j = 0; j < nw; j++) { + for (i = 0; i < nh; i++) + buffer[i + nh] = y[i * DWT_MAX_LENGTH + j]; + analysis_97(nh, buffer + nh, buffer, buffer + hh); + for (i = 0; i < nh; i++) + c[i * pitch_c + j] = round(buffer[i]); + } + } +} + +#endif // DWT_TYPE + +// TODO(debargha): Implement the scaling differently so as not to have to +// use the floating point dct +static void dct16x16_1d_f(double input[16], double output[16]) { + static const double C1 = 0.995184726672197; + static const double C2 = 0.98078528040323; + static const double C3 = 0.956940335732209; + static const double C4 = 0.923879532511287; + static const double C5 = 0.881921264348355; + static const double C6 = 0.831469612302545; + static const double C7 = 0.773010453362737; + static const double C8 = 0.707106781186548; + static const double C9 = 0.634393284163646; + static const double C10 = 0.555570233019602; + static const double C11 = 0.471396736825998; + static const double C12 = 0.38268343236509; + static const double C13 = 0.290284677254462; + static const double C14 = 0.195090322016128; + static const double C15 = 0.098017140329561; + + vp9_clear_system_state(); // Make it simd safe : __asm emms; + { + double step[16]; + double intermediate[16]; + double temp1, temp2; + + // step 1 + step[ 0] = input[0] + input[15]; + step[ 1] = input[1] + input[14]; + step[ 2] = input[2] + input[13]; + step[ 3] = input[3] + input[12]; + step[ 4] = input[4] + input[11]; + step[ 5] = input[5] + input[10]; + step[ 6] = input[6] + input[ 9]; + step[ 7] = input[7] + input[ 8]; + step[ 8] = input[7] - input[ 8]; + step[ 9] = input[6] - input[ 9]; + step[10] = input[5] - input[10]; + step[11] = input[4] - input[11]; + step[12] = input[3] - input[12]; + step[13] = input[2] - input[13]; + step[14] = input[1] - input[14]; + step[15] = input[0] - input[15]; + + // step 2 + output[0] = step[0] + step[7]; + output[1] = step[1] + step[6]; + output[2] = step[2] + step[5]; + output[3] = step[3] + step[4]; + output[4] = step[3] - step[4]; + output[5] = step[2] - step[5]; + output[6] = step[1] - step[6]; + output[7] = step[0] - step[7]; + + temp1 = step[ 8]*C7; + temp2 = step[15]*C9; + output[ 8] = temp1 + temp2; + + temp1 = step[ 9]*C11; + temp2 = step[14]*C5; + output[ 9] = temp1 - temp2; + + temp1 = step[10]*C3; + temp2 = step[13]*C13; + output[10] = temp1 + temp2; + + temp1 = step[11]*C15; + temp2 = step[12]*C1; + output[11] = temp1 - temp2; + + temp1 = step[11]*C1; + temp2 = step[12]*C15; + output[12] = temp2 + temp1; + + temp1 = step[10]*C13; + temp2 = step[13]*C3; + output[13] = temp2 - temp1; + + temp1 = step[ 9]*C5; + temp2 = step[14]*C11; + output[14] = temp2 + temp1; + + temp1 = step[ 8]*C9; + temp2 = step[15]*C7; + output[15] = temp2 - temp1; + + // step 3 + step[ 0] = output[0] + output[3]; + step[ 1] = output[1] + output[2]; + step[ 2] = output[1] - output[2]; + step[ 3] = output[0] - output[3]; + + temp1 = output[4]*C14; + temp2 = output[7]*C2; + step[ 4] = temp1 + temp2; + + temp1 = output[5]*C10; + temp2 = output[6]*C6; + step[ 5] = temp1 + temp2; + + temp1 = output[5]*C6; + temp2 = output[6]*C10; + step[ 6] = temp2 - temp1; + + temp1 = output[4]*C2; + temp2 = output[7]*C14; + step[ 7] = temp2 - temp1; + + step[ 8] = output[ 8] + output[11]; + step[ 9] = output[ 9] + output[10]; + step[10] = output[ 9] - output[10]; + step[11] = output[ 8] - output[11]; + + step[12] = output[12] + output[15]; + step[13] = output[13] + output[14]; + step[14] = output[13] - output[14]; + step[15] = output[12] - output[15]; + + // step 4 + output[ 0] = (step[ 0] + step[ 1]); + output[ 8] = (step[ 0] - step[ 1]); + + temp1 = step[2]*C12; + temp2 = step[3]*C4; + temp1 = temp1 + temp2; + output[ 4] = 2*(temp1*C8); + + temp1 = step[2]*C4; + temp2 = step[3]*C12; + temp1 = temp2 - temp1; + output[12] = 2*(temp1*C8); + + output[ 2] = 2*((step[4] + step[ 5])*C8); + output[14] = 2*((step[7] - step[ 6])*C8); + + temp1 = step[4] - step[5]; + temp2 = step[6] + step[7]; + output[ 6] = (temp1 + temp2); + output[10] = (temp1 - temp2); + + intermediate[8] = step[8] + step[14]; + intermediate[9] = step[9] + step[15]; + + temp1 = intermediate[8]*C12; + temp2 = intermediate[9]*C4; + temp1 = temp1 - temp2; + output[3] = 2*(temp1*C8); + + temp1 = intermediate[8]*C4; + temp2 = intermediate[9]*C12; + temp1 = temp2 + temp1; + output[13] = 2*(temp1*C8); + + output[ 9] = 2*((step[10] + step[11])*C8); + + intermediate[11] = step[10] - step[11]; + intermediate[12] = step[12] + step[13]; + intermediate[13] = step[12] - step[13]; + intermediate[14] = step[ 8] - step[14]; + intermediate[15] = step[ 9] - step[15]; + + output[15] = (intermediate[11] + intermediate[12]); + output[ 1] = -(intermediate[11] - intermediate[12]); + + output[ 7] = 2*(intermediate[13]*C8); + + temp1 = intermediate[14]*C12; + temp2 = intermediate[15]*C4; + temp1 = temp1 - temp2; + output[11] = -2*(temp1*C8); + + temp1 = intermediate[14]*C4; + temp2 = intermediate[15]*C12; + temp1 = temp2 + temp1; + output[ 5] = 2*(temp1*C8); + } + vp9_clear_system_state(); // Make it simd safe : __asm emms; +} + +void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch) { + vp9_clear_system_state(); // Make it simd safe : __asm emms; + { + int shortpitch = pitch >> 1; + int i, j; + double output[256]; + // First transform columns + for (i = 0; i < 16; i++) { + double temp_in[16], temp_out[16]; + for (j = 0; j < 16; j++) + temp_in[j] = input[j*shortpitch + i]; + dct16x16_1d_f(temp_in, temp_out); + for (j = 0; j < 16; j++) + output[j*16 + i] = temp_out[j]; + } + // Then transform rows + for (i = 0; i < 16; ++i) { + double temp_in[16], temp_out[16]; + for (j = 0; j < 16; ++j) + temp_in[j] = output[j + i*16]; + dct16x16_1d_f(temp_in, temp_out); + for (j = 0; j < 16; ++j) + output[j + i*16] = temp_out[j]; + } + // Scale by some magic number + for (i = 0; i < 256; i++) + out[i] = (short)round(output[i] / (4 << DWT_PRECISION_BITS)); + } + vp9_clear_system_state(); // Make it simd safe : __asm emms; +} + void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { // assume out is a 32x32 buffer short buffer[16 * 16]; int i; const int short_pitch = pitch >> 1; - dyadic_analyze(1, 32, 32, input, short_pitch, out, 32); +#if DWT_TYPE == 26 + dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32); +#elif DWT_TYPE == 97 + dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32); +#elif DWT_TYPE == 53 + dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32); +#endif // TODO(debargha): Implement more efficiently by adding output pitch // argument to the dct16x16 function - vp9_short_fdct16x16_c(out, buffer, 64); + vp9_short_fdct16x16_c_f(out, buffer, 64); for (i = 0; i < 16; ++i) vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16); + + vp9_short_fdct16x16_c_f(out + 16, buffer, 64); + for (i = 0; i < 16; ++i) + vpx_memcpy(out + i * 32 + 16, buffer + i * 16, sizeof(short) * 16); + + vp9_short_fdct16x16_c_f(out + 32 * 16, buffer, 64); + for (i = 0; i < 16; ++i) + vpx_memcpy(out + i * 32 + 32 * 16, buffer + i * 16, sizeof(short) * 16); + + vp9_short_fdct16x16_c_f(out + 33 * 16, buffer, 64); + for (i = 0; i < 16; ++i) + vpx_memcpy(out + i * 32 + 33 * 16, buffer + i * 16, sizeof(short) * 16); } #endif // CONFIG_DWT32X32HYBRID #endif // CONFIG_TX32X32 From 7fa3deb1f5a4d131e590b871bc2012f54bb5379b Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Thu, 13 Dec 2012 12:18:38 -0800 Subject: [PATCH 20/77] Build fixes with teh super blcoks and 32x32 expts Change-Id: I3c751f8d57ac7d3b754476dc6ce144d162534e6d --- vp9/decoder/vp9_detokenize.c | 4 ++-- vp9/encoder/vp9_rdopt.c | 4 ++-- vp9/encoder/vp9_tokenize.c | 22 +++++++++------------- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 0c822d429..1e3608fb2 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -270,8 +270,8 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, // 16x16 chroma blocks seg_eob = get_eob(xd, segment_id, 256); for (i = 16; i < 24; i += 4) { - ENTROPY_CONTEXT* const a = A + vp9_block2above_8x8[i]; - ENTROPY_CONTEXT* const l = L + vp9_block2left_8x8[i]; + ENTROPY_CONTEXT* const a = A + vp9_block2above[TX_16X16][i]; + ENTROPY_CONTEXT* const l = L + vp9_block2left[TX_16X16][i]; eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV, DCT_DCT, seg_eob, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 29fafc08b..9df59861e 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1762,8 +1762,8 @@ static int rd_cost_sbuv_16x16(MACROBLOCK *x) { for (b = 16; b < 24; b += 4) cost += cost_coeffs(x, xd->block + b, PLANE_TYPE_UV, - ta + vp9_block2above_8x8[b], - tl + vp9_block2left_8x8[b], TX_16X16); + ta + vp9_block2above[TX_8X8][b], + tl + vp9_block2left[TX_8X8][b], TX_16X16); return cost; } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 6ff76606f..c8d1d91fc 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -370,17 +370,16 @@ void vp9_tokenize_sb(VP9_COMP *cpi, if (!dry_run) cpi->skip_false_count[mb_skip_context] += skip_inc; - tokenize_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, - A[0], L[0], TX_32X32, dry_run); + tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, + TX_32X32, dry_run); A[0][1] = A[0][2] = A[0][3] = A[0][0]; L[0][1] = L[0][2] = L[0][3] = L[0][0]; for (b = 16; b < 24; b += 4) { - tokenize_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A[0] + vp9_block2above_8x8[b], L[0] + vp9_block2left_8x8[b], + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); - A[0][vp9_block2above_8x8[b] + 1] = A[0][vp9_block2above_8x8[b]]; - L[0][vp9_block2left_8x8[b] + 1] = L[0][vp9_block2left_8x8[b]]; + A[0][vp9_block2above[TX_16X16][b] + 1] = A[0][vp9_block2above[TX_16X16][b]]; + L[0][vp9_block2left[TX_16X16][b] + 1] = L[0][vp9_block2left[TX_16X16][b]]; } vpx_memset(&A[0][8], 0, sizeof(A[0][8])); vpx_memset(&L[0][8], 0, sizeof(L[0][8])); @@ -908,16 +907,13 @@ static void stuff_sb_32x32(VP9_COMP *cpi, MACROBLOCKD *xd, (ENTROPY_CONTEXT *) (xd->left_context + 1), }; int b; - stuff_b(cpi, xd, xd->block, t, PLANE_TYPE_Y_WITH_DC, - A[0], L[0], TX_32X32, dry_run); + stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); A[0][1] = A[0][2] = A[0][3] = A[0][0]; L[0][1] = L[0][2] = L[0][3] = L[0][0]; for (b = 16; b < 24; b += 4) { - stuff_b(cpi, xd, xd->block + b, t, PLANE_TYPE_UV, - A[0] + vp9_block2above_8x8[b], - L[0] + vp9_block2above_8x8[b], TX_16X16, dry_run); - A[0][vp9_block2above_8x8[b] + 1] = A[0][vp9_block2above_8x8[b]]; - L[0][vp9_block2left_8x8[b] + 1] = L[0][vp9_block2left_8x8[b]]; + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); + A[0][vp9_block2above[TX_16X16][b] + 1] = A[0][vp9_block2above[TX_16X16][b]]; + L[0][vp9_block2left[TX_16X16][b] + 1] = L[0][vp9_block2left[TX_16X16][b]]; } vpx_memset(&A[0][8], 0, sizeof(A[0][8])); vpx_memset(&L[0][8], 0, sizeof(L[0][8])); From c681887652abbf750f72f11e35d37a86f42e1fb8 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Thu, 13 Dec 2012 14:51:27 -0800 Subject: [PATCH 21/77] fixed build issue with round() not defined in msvc Change-Id: I8fe8462a0c2f636d8b43c0243832ca67578f3665 --- test/dct32x32_test.cc | 8 ++++++++ vp9/common/vp9_systemdependent.h | 14 ++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc index 8f089c683..1ac6fb0a9 100644 --- a/test/dct32x32_test.cc +++ b/test/dct32x32_test.cc @@ -27,6 +27,14 @@ extern "C" { using libvpx_test::ACMRandom; namespace { +#ifdef _MSC_VER +static int round(double x) { + if (x < 0) + return (int)ceil(x - 0.5); + else + return (int)floor(x + 0.5); +} +#endif #if !CONFIG_DWT32X32HYBRID static const double kPi = 3.141592653589793238462643383279502884; diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h index 91a50607a..5bc6237e6 100644 --- a/vp9/common/vp9_systemdependent.h +++ b/vp9/common/vp9_systemdependent.h @@ -10,6 +10,10 @@ #ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ #define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ +#ifdef _MSC_VER +#include +#endif + #include "vpx_ports/config.h" #if ARCH_X86 || ARCH_X86_64 void vpx_reset_mmx_state(void); @@ -18,6 +22,16 @@ void vpx_reset_mmx_state(void); #define vp9_clear_system_state() #endif +#ifdef _MSC_VER +// round is not defined in MSVC +static int round(double x) { + if (x < 0) + return (int)ceil(x - 0.5); + else + return (int)floor(x + 0.5); +} +#endif + struct VP9Common; void vp9_machine_specific_config(struct VP9Common *); #endif From 2b9ec585d6469b4a8031f3326039bfdc2894ccff Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Thu, 13 Dec 2012 14:58:30 -0800 Subject: [PATCH 22/77] fixed an encoder/decoder mismatch The mismatch was caused by an improper merge of cleanup code around tokenize_b() and stuff_b() with TX32X32 experiment. Change-Id: I225ae62f015983751f017386548d9c988c30664c --- vp9/encoder/vp9_tokenize.c | 53 +++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index c8d1d91fc..2d6138114 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -173,8 +173,10 @@ static void tokenize_b(VP9_COMP *cpi, break; case TX_16X16: #if CONFIG_CNVCONTEXT - a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; + if (type != PLANE_TYPE_UV) { + a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; + } #endif seg_eob = 256; bands = vp9_coef_bands_16x16; @@ -246,8 +248,15 @@ static void tokenize_b(VP9_COMP *cpi, a[1] = a_ec; l[1] = l_ec; } else if (tx_size == TX_16X16) { - a[1] = a[2] = a[3] = a_ec; - l[1] = l[2] = l[3] = l_ec; + if (type != PLANE_TYPE_UV) { + a[1] = a[2] = a[3] = a_ec; + l[1] = l[2] = l[3] = l_ec; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + } else { + a[1] = a_ec; + l[1] = l_ec; +#endif + } } } @@ -378,8 +387,6 @@ void vp9_tokenize_sb(VP9_COMP *cpi, for (b = 16; b < 24; b += 4) { tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); - A[0][vp9_block2above[TX_16X16][b] + 1] = A[0][vp9_block2above[TX_16X16][b]]; - L[0][vp9_block2left[TX_16X16][b] + 1] = L[0][vp9_block2left[TX_16X16][b]]; } vpx_memset(&A[0][8], 0, sizeof(A[0][8])); vpx_memset(&L[0][8], 0, sizeof(L[0][8])); @@ -749,8 +756,10 @@ static __inline void stuff_b(VP9_COMP *cpi, break; case TX_16X16: #if CONFIG_CNVCONTEXT - a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; + if (type != PLANE_TYPE_UV) { + a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; + } #endif bands = vp9_coef_bands_16x16; if (tx_type != DCT_DCT) { @@ -783,8 +792,15 @@ static __inline void stuff_b(VP9_COMP *cpi, a[1] = 0; l[1] = 0; } else if (tx_size == TX_16X16) { - a[1] = a[2] = a[3] = 0; - l[1] = l[2] = l[3] = 0; + if (type != PLANE_TYPE_UV) { + a[1] = a[2] = a[3] = 0; + l[1] = l[2] = l[3] = 0; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + } else { + a[1] = 0; + l[1] = 0; +#endif + } } if (!dry_run) { @@ -932,23 +948,6 @@ void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { } #endif -void vp9_fix_contexts(MACROBLOCKD *xd) { - /* Clear entropy contexts for blocks */ - if ((xd->mode_info_context->mbmi.mode != B_PRED - && xd->mode_info_context->mbmi.mode != I8X8_PRED - && xd->mode_info_context->mbmi.mode != SPLITMV) - || xd->mode_info_context->mbmi.txfm_size == TX_16X16 - ) { - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - } else { - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1); - xd->above_context->y2 = 1; - xd->left_context->y2 = 1; - } -} - #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS void vp9_fix_contexts_sb(MACROBLOCKD *xd) { vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); From 1306ba765913c90609e8a41ee0890352f62ddb05 Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Fri, 14 Dec 2012 12:35:33 -0800 Subject: [PATCH 23/77] Remove vp9_type_aliases.h Prefer the standard fixed-size integer typedefs. Change-Id: Iad75582350669e49a8da3b7facb9c259e9514a5b --- vp9/common/vp9_filter.c | 42 ++++++------ vp9/common/vp9_onyx.h | 1 - vp9/common/vp9_onyxd.h | 1 - vp9/common/vp9_pred_common.h | 1 - vp9/common/vp9_seg_common.h | 1 - vp9/common/vp9_subpelvar.h | 30 ++++----- vp9/common/vp9_type_aliases.h | 120 ---------------------------------- vp9/decoder/vp9_detokenize.c | 7 +- vp9/decoder/vp9_onyxd_int.h | 10 +-- vp9/encoder/vp9_firstpass.c | 17 +++-- vp9/encoder/vp9_onyx_if.c | 18 ++--- vp9/encoder/vp9_tokenize.c | 4 +- vp9/encoder/vp9_tokenize.h | 4 +- vp9/vp9_common.mk | 1 - 14 files changed, 67 insertions(+), 190 deletions(-) delete mode 100644 vp9/common/vp9_type_aliases.h diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c index 710b3917e..9e1f71757 100644 --- a/vp9/common/vp9_filter.c +++ b/vp9/common/vp9_filter.c @@ -838,13 +838,13 @@ void vp9_eighttap_predict_avg16x16_sharp_c(unsigned char *src_ptr, * * ROUTINE : filter_block2d_bil_first_pass * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_stride : Stride of source block. - * UINT32 height : Block height. - * UINT32 width : Block width. - * INT32 *vp9_filter : Array of 2 bi-linear filter taps. + * INPUTS : uint8_t *src_ptr : Pointer to source block. + * uint32_t src_stride : Stride of source block. + * uint32_t height : Block height. + * uint32_t width : Block width. + * int32_t *vp9_filter : Array of 2 bi-linear filter taps. * - * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block. + * OUTPUTS : int32_t *dst_ptr : Pointer to filtered block. * * RETURNS : void * @@ -852,7 +852,7 @@ void vp9_eighttap_predict_avg16x16_sharp_c(unsigned char *src_ptr, * in the horizontal direction to produce the filtered output * block. Used to implement first-pass of 2-D separable filter. * - * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. + * SPECIAL NOTES : Produces int32_t output to retain precision for next pass. * Two filter taps should sum to VP9_FILTER_WEIGHT. * ****************************************************************************/ @@ -883,13 +883,13 @@ static void filter_block2d_bil_first_pass(unsigned char *src_ptr, * * ROUTINE : filter_block2d_bil_second_pass * - * INPUTS : INT32 *src_ptr : Pointer to source block. - * UINT32 dst_pitch : Destination block pitch. - * UINT32 height : Block height. - * UINT32 width : Block width. - * INT32 *vp9_filter : Array of 2 bi-linear filter taps. + * INPUTS : int32_t *src_ptr : Pointer to source block. + * uint32_t dst_pitch : Destination block pitch. + * uint32_t height : Block height. + * uint32_t width : Block width. + * int32_t *vp9_filter : Array of 2 bi-linear filter taps. * - * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. + * OUTPUTS : uint16_t *dst_ptr : Pointer to filtered block. * * RETURNS : void * @@ -962,15 +962,15 @@ static void filter_block2d_bil_second_pass_avg(unsigned short *src_ptr, * * ROUTINE : filter_block2d_bil * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_pitch : Stride of source block. - * UINT32 dst_pitch : Stride of destination block. - * INT32 *HFilter : Array of 2 horizontal filter taps. - * INT32 *VFilter : Array of 2 vertical filter taps. - * INT32 Width : Block width - * INT32 Height : Block height + * INPUTS : uint8_t *src_ptr : Pointer to source block. + * uint32_t src_pitch : Stride of source block. + * uint32_t dst_pitch : Stride of destination block. + * int32_t *HFilter : Array of 2 horizontal filter taps. + * int32_t *VFilter : Array of 2 vertical filter taps. + * int32_t Width : Block width + * int32_t Height : Block height * - * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. + * OUTPUTS : uint16_t *dst_ptr : Pointer to filtered block. * * RETURNS : void * diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h index a6bd95118..0e157f1de 100644 --- a/vp9/common/vp9_onyx.h +++ b/vp9/common/vp9_onyx.h @@ -20,7 +20,6 @@ extern "C" #include "vpx/internal/vpx_codec_internal.h" #include "vpx/vp8cx.h" #include "vpx_scale/yv12config.h" -#include "vp9/common/vp9_type_aliases.h" #include "vp9/common/vp9_ppflags.h" typedef int *VP9_PTR; diff --git a/vp9/common/vp9_onyxd.h b/vp9/common/vp9_onyxd.h index 063e5a894..807676fdc 100644 --- a/vp9/common/vp9_onyxd.h +++ b/vp9/common/vp9_onyxd.h @@ -18,7 +18,6 @@ extern "C" { #endif -#include "vp9/common/vp9_type_aliases.h" #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_ppflags.h" #include "vpx_ports/mem.h" diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h index af6ad7b9d..71b46af64 100644 --- a/vp9/common/vp9_pred_common.h +++ b/vp9/common/vp9_pred_common.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vp9/common/vp9_type_aliases.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_blockd.h" diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h index a9f9b93be..c5c2bb05b 100644 --- a/vp9/common/vp9_seg_common.h +++ b/vp9/common/vp9_seg_common.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vp9/common/vp9_type_aliases.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_blockd.h" diff --git a/vp9/common/vp9_subpelvar.h b/vp9/common/vp9_subpelvar.h index b3c3fcdaf..79fed5571 100644 --- a/vp9/common/vp9_subpelvar.h +++ b/vp9/common/vp9_subpelvar.h @@ -43,14 +43,14 @@ static void variance(const unsigned char *src_ptr, * * ROUTINE : filter_block2d_bil_first_pass * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * UINT32 pixel_step : Offset between filter input samples (see notes). - * UINT32 output_height : Input block height. - * UINT32 output_width : Input block width. - * INT32 *vp9_filter : Array of 2 bi-linear filter taps. + * INPUTS : uint8_t *src_ptr : Pointer to source block. + * uint32_t src_pixels_per_line : Stride of input block. + * uint32_t pixel_step : Offset between filter input samples (see notes). + * uint32_t output_height : Input block height. + * uint32_t output_width : Input block width. + * int32_t *vp9_filter : Array of 2 bi-linear filter taps. * - * OUTPUTS : INT32 *output_ptr : Pointer to filtered block. + * OUTPUTS : int32_t *output_ptr : Pointer to filtered block. * * RETURNS : void * @@ -59,7 +59,7 @@ static void variance(const unsigned char *src_ptr, * filtered output block. Used to implement first-pass * of 2-D separable filter. * - * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. + * SPECIAL NOTES : Produces int32_t output to retain precision for next pass. * Two filter taps should sum to VP9_FILTER_WEIGHT. * pixel_step defines whether the filter is applied * horizontally (pixel_step=1) or vertically (pixel_step=stride). @@ -95,14 +95,14 @@ static void var_filter_block2d_bil_first_pass(const unsigned char *src_ptr, * * ROUTINE : filter_block2d_bil_second_pass * - * INPUTS : INT32 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * UINT32 pixel_step : Offset between filter input samples (see notes). - * UINT32 output_height : Input block height. - * UINT32 output_width : Input block width. - * INT32 *vp9_filter : Array of 2 bi-linear filter taps. + * INPUTS : int32_t *src_ptr : Pointer to source block. + * uint32_t src_pixels_per_line : Stride of input block. + * uint32_t pixel_step : Offset between filter input samples (see notes). + * uint32_t output_height : Input block height. + * uint32_t output_width : Input block width. + * int32_t *vp9_filter : Array of 2 bi-linear filter taps. * - * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. + * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block. * * RETURNS : void * diff --git a/vp9/common/vp9_type_aliases.h b/vp9/common/vp9_type_aliases.h deleted file mode 100644 index 110e2d07d..000000000 --- a/vp9/common/vp9_type_aliases.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : vp9_type_aliases.h -* -* Description : Standard type aliases -* -****************************************************************************/ -#ifndef VP9_COMMON_VP9_TYPE_ALIASES_H_ -#define VP9_COMMON_VP9_TYPE_ALIASES_H_ - -/**************************************************************************** -* Macros -****************************************************************************/ -#define EXPORT -#define IMPORT extern /* Used to declare imported data & routines */ -#define PRIVATE static /* Used to declare & define module-local data */ -#define LOCAL static /* Used to define all persistent routine-local data */ -#define STD_IN_PATH 0 /* Standard input path */ -#define STD_OUT_PATH 1 /* Standard output path */ -#define STD_ERR_PATH 2 /* Standard error path */ -#define STD_IN_FILE stdin /* Standard input file pointer */ -#define STD_OUT_FILE stdout /* Standard output file pointer */ -#define STD_ERR_FILE stderr /* Standard error file pointer */ -#define max_int 0x7FFFFFFF - -#define __export -#define _export - -#define CCONV - -#ifndef NULL -#ifdef __cplusplus -#define NULL 0 -#else -#define NULL ((void *)0) -#endif -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - -/**************************************************************************** -* Typedefs -****************************************************************************/ -#ifndef TYPE_INT8 -#define TYPE_INT8 -typedef signed char INT8; -#endif - -#ifndef TYPE_INT16 -/*#define TYPE_INT16*/ -typedef signed short INT16; -#endif - -#ifndef TYPE_INT32 -/*#define TYPE_INT32*/ -typedef signed int INT32; -#endif - -#ifndef TYPE_UINT8 -/*#define TYPE_UINT8*/ -typedef unsigned char UINT8; -#endif - -#ifndef TYPE_UINT32 -/*#define TYPE_UINT32*/ -typedef unsigned int UINT32; -#endif - -#ifndef TYPE_UINT16 -/*#define TYPE_UINT16*/ -typedef unsigned short UINT16; -#endif - -#ifndef TYPE_BOOL -/*#define TYPE_BOOL*/ -typedef int BOOL; -#endif - -typedef unsigned char BOOLEAN; - -#ifdef _MSC_VER -typedef __int64 INT64; -#ifndef INT64_MAX -#define INT64_MAX LLONG_MAX -#endif -#else - -#ifndef TYPE_INT64 -#ifdef _TMS320C6X -/* for now we only have 40bits */ -typedef long INT64; -#else -typedef long long INT64; -#endif -#endif - -#endif - -/* Floating point */ -typedef double FLOAT64; -typedef float FLOAT32; - -#endif diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 1e3608fb2..5e5861aad 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -9,7 +9,6 @@ */ -#include "vp9/common/vp9_type_aliases.h" #include "vp9/common/vp9_blockd.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vpx_mem/vpx_mem.h" @@ -91,7 +90,7 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) { #define WRITE_COEF_CONTINUE(val, token) \ { \ - qcoeff_ptr[scan[c]] = (INT16) get_signed(br, val); \ + qcoeff_ptr[scan[c]] = (int16_t) get_signed(br, val); \ INCREMENT_COUNT(token); \ c++; \ continue; \ @@ -100,7 +99,7 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) { #define ADJUST_COEF(prob, bits_count) \ do { \ if (vp9_read(br, prob)) \ - val += (UINT16)(1 << bits_count);\ + val += (uint16_t)(1 << bits_count);\ } while (0); static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, @@ -108,7 +107,7 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, PLANE_TYPE type, TX_TYPE tx_type, - int seg_eob, INT16 *qcoeff_ptr, + int seg_eob, int16_t *qcoeff_ptr, const int *const scan, TX_SIZE txfm_size, const int *coef_bands) { FRAME_CONTEXT *const fc = &dx->common.fc; diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index 471442944..73229ed83 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -35,22 +35,22 @@ typedef struct { typedef struct { int const *scan; int const *scan_8x8; - UINT8 const *ptr_block2leftabove; + uint8_t const *ptr_block2leftabove; vp9_tree_index const *vp9_coef_tree_ptr; unsigned char *norm_ptr; - UINT8 *ptr_coef_bands_x; - UINT8 *ptr_coef_bands_x_8x8; + uint8_t *ptr_coef_bands_x; + uint8_t *ptr_coef_bands_x_8x8; ENTROPY_CONTEXT_PLANES *A; ENTROPY_CONTEXT_PLANES *L; - INT16 *qcoeff_start_ptr; + int16_t *qcoeff_start_ptr; vp9_prob const *coef_probs_4x4[BLOCK_TYPES_4X4]; vp9_prob const *coef_probs_8x8[BLOCK_TYPES_8X8]; vp9_prob const *coef_probs_16X16[BLOCK_TYPES_16X16]; - UINT8 eob[25]; + uint8_t eob[25]; } DETOK; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 13e189960..37f270e15 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -1232,7 +1232,7 @@ static int detect_transition_to_still( int still_interval, double loop_decay_rate, double last_decay_rate) { - BOOL trans_to_still = FALSE; + int trans_to_still = FALSE; // Break clause to detect very still sections after motion // For example a static image after a fade or other transition @@ -1270,10 +1270,10 @@ static int detect_transition_to_still( // This function detects a flash through the high relative pcnt_second_ref // score in the frame following a flash frame. The offset passed in should // reflect this -static BOOL detect_flash(VP9_COMP *cpi, int offset) { +static int detect_flash(VP9_COMP *cpi, int offset) { FIRSTPASS_STATS next_frame; - BOOL flash_detected = FALSE; + int flash_detected = FALSE; // Read the frame data. // The return is FALSE (no flash detected) if not a valid frame @@ -1385,7 +1385,7 @@ static int calc_arf_boost( double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; int arf_boost; - BOOL flash_detected = FALSE; + int flash_detected = FALSE; // Search forward from the proposed arf/next gf position for (i = 0; i < f_frames; i++) { @@ -1542,7 +1542,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int f_boost = 0; int b_boost = 0; - BOOL flash_detected; + int flash_detected; cpi->twopass.gf_group_bits = 0; @@ -2101,8 +2101,11 @@ void vp9_second_pass(VP9_COMP *cpi) { } -static BOOL test_candidate_kf(VP9_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTPASS_STATS *this_frame, FIRSTPASS_STATS *next_frame) { - BOOL is_viable_kf = FALSE; +static int test_candidate_kf(VP9_COMP *cpi, + FIRSTPASS_STATS *last_frame, + FIRSTPASS_STATS *this_frame, + FIRSTPASS_STATS *next_frame) { + int is_viable_kf = FALSE; // Does the frame satisfy the primary criteria of a key frame // If so, then examine how well it predicts subsequent frames diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 07a3a0444..1c7871050 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -118,7 +118,7 @@ unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, extern unsigned __int64 Sectionbits[500]; #endif #ifdef MODE_STATS -extern INT64 Sectionbits[500]; +extern int64_t Sectionbits[500]; extern unsigned int y_modes[VP9_YMODES]; extern unsigned int i8x8_modes[VP9_I8X8_MODES]; extern unsigned int uv_modes[VP9_UV_MODES]; @@ -2698,10 +2698,10 @@ static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) { // Function to test for conditions that indicate we should loop // back and recode a frame. -static BOOL recode_loop_test(VP9_COMP *cpi, - int high_limit, int low_limit, - int q, int maxq, int minq) { - BOOL force_recode = FALSE; +static int recode_loop_test(VP9_COMP *cpi, + int high_limit, int low_limit, + int q, int maxq, int minq) { + int force_recode = FALSE; VP9_COMMON *cm = &cpi->common; // Is frame recode allowed at all @@ -2941,7 +2941,7 @@ static void encode_frame_to_data_rate int mcomp_filters = sizeof(mcomp_filters_to_search) / sizeof(*mcomp_filters_to_search); int mcomp_filter_index = 0; - INT64 mcomp_filter_cost[4]; + int64_t mcomp_filter_cost[4]; // Clear down mmx registers to allow floating point in what follows vp9_clear_system_state(); @@ -3526,9 +3526,9 @@ static void encode_frame_to_data_rate if (Loop == FALSE && cm->frame_type != KEY_FRAME && sf->search_best_filter) { if (mcomp_filter_index < mcomp_filters) { - INT64 err = vp9_calc_ss_err(cpi->Source, + int64_t err = vp9_calc_ss_err(cpi->Source, &cm->yv12_fb[cm->new_fb_idx]); - INT64 rate = cpi->projected_frame_size << 8; + int64_t rate = cpi->projected_frame_size << 8; mcomp_filter_cost[mcomp_filter_index] = (RDCOST(cpi->RDMULT, cpi->RDDIV, rate, err)); mcomp_filter_index++; @@ -3538,7 +3538,7 @@ static void encode_frame_to_data_rate Loop = TRUE; } else { int f; - INT64 best_cost = mcomp_filter_cost[0]; + int64_t best_cost = mcomp_filter_cost[0]; int mcomp_best_filter = mcomp_filters_to_search[0]; for (f = 1; f < mcomp_filters; f++) { if (mcomp_filter_cost[f] < best_cost) { diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 2d6138114..fb9e3ed72 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -582,10 +582,10 @@ static void print_counter(FILE *f, vp9_coeff_accum *context_counters, t = 0; do { - const INT64 x = context_counters[type][band][pt][t]; + const int64_t x = context_counters[type][band][pt][t]; const int y = (int) x; - assert(x == (INT64) y); /* no overflow handling yet */ + assert(x == (int64_t) y); /* no overflow handling yet */ fprintf(f, "%s %d", Comma(t), y); } while (++t < MAX_ENTROPY_TOKENS); fprintf(f, "}"); diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 17ff9b32c..2f907801b 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -29,8 +29,8 @@ typedef struct { unsigned char skip_eob_node; } TOKENEXTRA; -typedef INT64 vp9_coeff_accum[COEF_BANDS][PREV_COEF_CONTEXTS] - [MAX_ENTROPY_TOKENS]; +typedef int64_t vp9_coeff_accum[COEF_BANDS][PREV_COEF_CONTEXTS] + [MAX_ENTROPY_TOKENS]; extern int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block); extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 5d2fe6ff9..ea1cc0970 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -9,7 +9,6 @@ ## VP9_COMMON_SRCS-yes += vp9_common.mk -VP9_COMMON_SRCS-yes += common/vp9_type_aliases.h VP9_COMMON_SRCS-yes += common/vp9_pragmas.h VP9_COMMON_SRCS-yes += common/vp9_ppflags.h VP9_COMMON_SRCS-yes += common/vp9_onyx.h From ebb5f2f7bd27f1d0bb2b8f49ed717c85b9d49c95 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 18 Dec 2012 10:38:12 -0800 Subject: [PATCH 24/77] Remove redundant "Prob" type (it's a duplicate of vp9_prob). Change-Id: I9548891d7b8ff672a31579bcdce74e4cea529883 --- vp9/common/vp9_entropy.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 1e5848843..ec1fbf61b 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -24,8 +24,6 @@ typedef const uchar cuchar; typedef const uint cuint; -typedef vp9_prob Prob; - #include "vp9/common/vp9_coefupdateprobs.h" const int vp9_i8x8_block[4] = {0, 2, 8, 10}; @@ -490,12 +488,12 @@ struct vp9_token_struct vp9_coef_encodings[MAX_ENTROPY_TOKENS]; /* Trees for extra bits. Probabilities are constant and do not depend on previously encoded bits */ -static const Prob Pcat1[] = { 159}; -static const Prob Pcat2[] = { 165, 145}; -static const Prob Pcat3[] = { 173, 148, 140}; -static const Prob Pcat4[] = { 176, 155, 140, 135}; -static const Prob Pcat5[] = { 180, 157, 141, 134, 130}; -static const Prob Pcat6[] = { +static const vp9_prob Pcat1[] = { 159}; +static const vp9_prob Pcat2[] = { 165, 145}; +static const vp9_prob Pcat3[] = { 173, 148, 140}; +static const vp9_prob Pcat4[] = { 176, 155, 140, 135}; +static const vp9_prob Pcat5[] = { 180, 157, 141, 134, 130}; +static const vp9_prob Pcat6[] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; From 8986eb5c26bb19e9e797819a72102e1061b68d74 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 18 Dec 2012 10:49:10 -0800 Subject: [PATCH 25/77] Give 4x4 scan and coef_band tables a _4x4 suffix. This matches the names of tables for all other transform sizes. Change-Id: Ia7681b7f8d34c97c27b0eb0e34d490cd0f8d02c6 --- vp9/common/vp9_entropy.c | 8 ++++---- vp9/common/vp9_entropy.h | 8 ++++---- vp9/decoder/vp9_decodframe.c | 2 +- vp9/decoder/vp9_detokenize.c | 18 ++++++++++-------- vp9/encoder/vp9_encodemb.c | 16 ++++++++-------- vp9/encoder/vp9_quantize.c | 12 ++++++------ vp9/encoder/vp9_rdopt.c | 16 ++++++++-------- vp9/encoder/vp9_tokenize.c | 14 +++++++------- 8 files changed, 48 insertions(+), 46 deletions(-) diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index ec1fbf61b..27e90b66c 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -47,7 +47,7 @@ DECLARE_ALIGNED(16, const unsigned char, vp9_norm[256]) = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -DECLARE_ALIGNED(16, const int, vp9_coef_bands[16]) = { +DECLARE_ALIGNED(16, const int, vp9_coef_bands_4x4[16]) = { 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7 }; @@ -55,20 +55,20 @@ DECLARE_ALIGNED(16, cuchar, vp9_prev_token_class[MAX_ENTROPY_TOKENS]) = { 0, 1, 2, 2, 3, 3, 3, 3, 3, 3, 3, 0 }; -DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d[16]) = { +DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]) = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15, }; -DECLARE_ALIGNED(16, const int, vp9_col_scan[16]) = { +DECLARE_ALIGNED(16, const int, vp9_col_scan_4x4[16]) = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; -DECLARE_ALIGNED(16, const int, vp9_row_scan[16]) = { +DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]) = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 4262b3030..f55ab8c93 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -74,7 +74,7 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ position within the 4x4 DCT. */ #define COEF_BANDS 8 -extern DECLARE_ALIGNED(16, const int, vp9_coef_bands[16]); +extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_4x4[16]); extern DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]); extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS @@ -114,10 +114,10 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp9_prev_token_class[MAX_ENTROPY struct VP9Common; void vp9_default_coef_probs(struct VP9Common *); -extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d[16]); +extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]); -extern DECLARE_ALIGNED(16, const int, vp9_col_scan[16]); -extern DECLARE_ALIGNED(16, const int, vp9_row_scan[16]); +extern DECLARE_ALIGNED(16, const int, vp9_col_scan_4x4[16]); +extern DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]); extern DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]); extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]); diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index d72d08698..8d9f77b77 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -88,7 +88,7 @@ void vp9_init_de_quantizer(VP9D_COMP *pbi) { /* all the ac values =; */ for (i = 1; i < 16; i++) { - int rc = vp9_default_zig_zag1d[i]; + int rc = vp9_default_zig_zag1d_4x4[i]; pc->Y1dequant[Q][rc] = (short)vp9_ac_yquant(Q); pc->Y2dequant[Q][rc] = (short)vp9_ac2quant(Q, pc->y2ac_delta_q); diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 5e5861aad..4e158976f 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -368,7 +368,8 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, eobs[24] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_Y2, DCT_DCT, get_eob(xd, segment_id, 4), xd->block[24].qcoeff, - vp9_default_zig_zag1d, TX_8X8, vp9_coef_bands); + vp9_default_zig_zag1d_4x4, TX_8X8, + vp9_coef_bands_4x4); eobtotal += c - 4; type = PLANE_TYPE_Y_NO_DC; } else { @@ -412,7 +413,8 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV, DCT_DCT, seg_eob, xd->block[i].qcoeff, - vp9_default_zig_zag1d, TX_4X4, vp9_coef_bands); + vp9_default_zig_zag1d_4x4, TX_4X4, + vp9_coef_bands_4x4); eobtotal += c; } } else { @@ -453,7 +455,7 @@ static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, int c; c = decode_coefs(dx, xd, bc, a, l, type, tx_type, seg_eob, - xd->block[i].qcoeff, scan, TX_4X4, vp9_coef_bands); + xd->block[i].qcoeff, scan, TX_4X4, vp9_coef_bands_4x4); eobs[i] = c; return c; @@ -468,13 +470,13 @@ static int decode_coefs_4x4_y(VP9D_COMP *dx, MACROBLOCKD *xd, switch (tx_type) { case ADST_DCT: - scan = vp9_row_scan; + scan = vp9_row_scan_4x4; break; case DCT_ADST: - scan = vp9_col_scan; + scan = vp9_col_scan_4x4; break; default: - scan = vp9_default_zig_zag1d; + scan = vp9_default_zig_zag1d_4x4; break; } @@ -499,7 +501,7 @@ static int decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, // chroma blocks for (i = 16; i < 24; i++) { eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_UV, i, seg_eob, - DCT_DCT, vp9_default_zig_zag1d); + DCT_DCT, vp9_default_zig_zag1d_4x4); } return eobtotal; @@ -526,7 +528,7 @@ static int vp9_decode_mb_tokens_4x4(VP9D_COMP* const dx, // 2nd order DC block if (has_2nd_order) { eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_Y2, 24, seg_eob, - DCT_DCT, vp9_default_zig_zag1d) - 16; + DCT_DCT, vp9_default_zig_zag1d_4x4) - 16; type = PLANE_TYPE_Y_NO_DC; } else { xd->above_context->y2 = 0; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 497509995..6b175085f 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -384,8 +384,8 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, switch (tx_size) { default: case TX_4X4: - scan = vp9_default_zig_zag1d; - bands = vp9_coef_bands; + scan = vp9_default_zig_zag1d_4x4; + bands = vp9_coef_bands_4x4; default_eob = 16; // TODO: this isn't called (for intra4x4 modes), but will be left in // since it could be used later @@ -394,19 +394,19 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, if (tx_type != DCT_DCT) { switch (tx_type) { case ADST_DCT: - scan = vp9_row_scan; + scan = vp9_row_scan_4x4; break; case DCT_ADST: - scan = vp9_col_scan; + scan = vp9_col_scan_4x4; break; default: - scan = vp9_default_zig_zag1d; + scan = vp9_default_zig_zag1d_4x4; break; } } else { - scan = vp9_default_zig_zag1d; + scan = vp9_default_zig_zag1d_4x4; } } break; @@ -601,7 +601,7 @@ static void check_reset_2nd_coeffs(MACROBLOCKD *xd, return; for (i = 0; i < bd->eob; i++) { - int coef = bd->dqcoeff[vp9_default_zig_zag1d[i]]; + int coef = bd->dqcoeff[vp9_default_zig_zag1d_4x4[i]]; sum += (coef >= 0) ? coef : -coef; if (sum >= SUM_2ND_COEFF_THRESH) return; @@ -609,7 +609,7 @@ static void check_reset_2nd_coeffs(MACROBLOCKD *xd, if (sum < SUM_2ND_COEFF_THRESH) { for (i = 0; i < bd->eob; i++) { - int rc = vp9_default_zig_zag1d[i]; + int rc = vp9_default_zig_zag1d_4x4[i]; bd->qcoeff[rc] = 0; bd->dqcoeff[rc] = 0; } diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index fcc7d2948..1f5f8f72d 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -40,15 +40,15 @@ void vp9_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) { switch (tx_type) { case ADST_DCT : - pt_scan = vp9_row_scan; + pt_scan = vp9_row_scan_4x4; break; case DCT_ADST : - pt_scan = vp9_col_scan; + pt_scan = vp9_col_scan_4x4; break; default : - pt_scan = vp9_default_zig_zag1d; + pt_scan = vp9_default_zig_zag1d_4x4; break; } @@ -106,7 +106,7 @@ void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) { eob = -1; for (i = 0; i < b->eob_max_offset; i++) { - rc = vp9_default_zig_zag1d[i]; + rc = vp9_default_zig_zag1d_4x4[i]; z = coeff_ptr[rc]; zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; @@ -192,7 +192,7 @@ void vp9_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) { eob = -1; for (i = 0; i < b->eob_max_offset_8x8; i++) { - rc = vp9_default_zig_zag1d[i]; + rc = vp9_default_zig_zag1d_4x4[i]; z = coeff_ptr[rc]; zbin_boost_ptr = &b->zrun_zbin_boost[zbin_zrun_index]; @@ -606,7 +606,7 @@ void vp9_init_quantizer(VP9_COMP *cpi) { // all the 4x4 ac values =; for (i = 1; i < 16; i++) { - int rc = vp9_default_zig_zag1d[i]; + int rc = vp9_default_zig_zag1d_4x4[i]; quant_val = vp9_ac_yquant(Q); invert_quant(cpi->Y1quant[Q] + rc, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 9df59861e..5b5467e61 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -526,15 +526,15 @@ static int cost_coeffs_2x2(MACROBLOCK *mb, assert(eob <= 4); for (; c < eob; c++) { - int v = qcoeff_ptr[vp9_default_zig_zag1d[c]]; + int v = qcoeff_ptr[vp9_default_zig_zag1d_4x4[c]]; int t = vp9_dct_value_tokens_ptr[v].Token; - cost += mb->token_costs[TX_8X8][type][vp9_coef_bands[c]][pt][t]; + cost += mb->token_costs[TX_8X8][type][vp9_coef_bands_4x4[c]][pt][t]; cost += vp9_dct_value_cost_ptr[v]; pt = vp9_prev_token_class[t]; } if (c < 4) - cost += mb->token_costs[TX_8X8][type][vp9_coef_bands[c]] + cost += mb->token_costs[TX_8X8][type][vp9_coef_bands_4x4[c]] [pt] [DCT_EOB_TOKEN]; // is eob first coefficient; pt = (c > !type); @@ -555,8 +555,8 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, MB_MODE_INFO *mbmi = &mb->e_mbd.mode_info_context->mbmi; TX_TYPE tx_type = DCT_DCT; int segment_id = mbmi->segment_id; - scan = vp9_default_zig_zag1d; - band = vp9_coef_bands; + scan = vp9_default_zig_zag1d_4x4; + band = vp9_coef_bands_4x4; default_eob = 16; switch (tx_size) { @@ -566,15 +566,15 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, if (tx_type != DCT_DCT) { switch (tx_type) { case ADST_DCT: - scan = vp9_row_scan; + scan = vp9_row_scan_4x4; break; case DCT_ADST: - scan = vp9_col_scan; + scan = vp9_col_scan_4x4; break; default: - scan = vp9_default_zig_zag1d; + scan = vp9_default_zig_zag1d_4x4; break; } } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index fb9e3ed72..796d108cd 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -134,15 +134,15 @@ static void tokenize_b(VP9_COMP *cpi, default: case TX_4X4: seg_eob = 16; - bands = vp9_coef_bands; - scan = vp9_default_zig_zag1d; + bands = vp9_coef_bands_4x4; + scan = vp9_default_zig_zag1d_4x4; if (tx_type != DCT_DCT) { counts = cpi->hybrid_coef_counts_4x4; probs = cpi->common.fc.hybrid_coef_probs_4x4; if (tx_type == ADST_DCT) { - scan = vp9_row_scan; + scan = vp9_row_scan_4x4; } else if (tx_type == DCT_ADST) { - scan = vp9_col_scan; + scan = vp9_col_scan_4x4; } } else { counts = cpi->coef_counts_4x4; @@ -152,8 +152,8 @@ static void tokenize_b(VP9_COMP *cpi, case TX_8X8: if (type == PLANE_TYPE_Y2) { seg_eob = 4; - bands = vp9_coef_bands; - scan = vp9_default_zig_zag1d; + bands = vp9_coef_bands_4x4; + scan = vp9_default_zig_zag1d_4x4; } else { #if CONFIG_CNVCONTEXT a_ec = (a[0] + a[1]) != 0; @@ -729,7 +729,7 @@ static __inline void stuff_b(VP9_COMP *cpi, switch (tx_size) { default: case TX_4X4: - bands = vp9_coef_bands; + bands = vp9_coef_bands_4x4; if (tx_type != DCT_DCT) { counts = cpi->hybrid_coef_counts_4x4; probs = cpi->common.fc.hybrid_coef_probs_4x4; From e8d610dda06a972236dbf6ddeda5f4d16dbddb3b Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Tue, 18 Dec 2012 12:09:46 -0800 Subject: [PATCH 26/77] Fix a warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed the warning: the size of array ‘intermediate_buffer’ can’t be evaluated [-Wvla]. Change-Id: Ibcffd6969bd71cee0c10f7cf18960e58cd0bd915 --- vp9/common/vp9_filter.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c index 9e1f71757..54d45f5a3 100644 --- a/vp9/common/vp9_filter.c +++ b/vp9/common/vp9_filter.c @@ -470,9 +470,6 @@ static const unsigned int filter_size_to_wh[][2] = { {16,16}, }; -static const unsigned int filter_max_height = 16; -static const unsigned int filter_max_width = 16; - static void filter_block2d_8_c(const unsigned char *src_ptr, const unsigned int src_stride, const short *HFilter, @@ -490,14 +487,15 @@ static void filter_block2d_8_c(const unsigned char *src_ptr, const int kInterp_Extend = 4; const unsigned int intermediate_height = (kInterp_Extend - 1) + output_height + kInterp_Extend; - const unsigned int max_intermediate_height = - (kInterp_Extend - 1) + filter_max_height + kInterp_Extend; -#ifdef _MSC_VER - // MSVC does not support C99 style declaration + + /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, + * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height + * + kInterp_Extend + * = 3 + 16 + 4 + * = 23 + * and filter_max_width = 16 + */ unsigned char intermediate_buffer[23 * 16]; -#else - unsigned char intermediate_buffer[max_intermediate_height * filter_max_width]; -#endif const int intermediate_next_stride = 1 - intermediate_height * output_width; // Horizontal pass (src -> transposed intermediate). From 779c5f28a8be1d4d7aa689076aa466e46273920a Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Tue, 18 Dec 2012 12:58:18 -0800 Subject: [PATCH 27/77] Fix uninitialized warning Fixed uninitialized warning for txfm_size. Change-Id: I42b7e802c3e84825d49f34e632361502641b7cbf --- vp9/encoder/vp9_rdopt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 9df59861e..be81e56f4 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1381,7 +1381,7 @@ static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, int *skippable, int64_t txfm_cache[NB_TXFM_MODES]) { MB_PREDICTION_MODE mode; - TX_SIZE txfm_size; + TX_SIZE UNINITIALIZED_IS_SAFE(txfm_size); MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); #if CONFIG_COMP_INTRA_PRED MB_PREDICTION_MODE mode2; From de269c8a621ce5bb3a652559093617feb8061b7f Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Tue, 18 Dec 2012 11:25:24 -0800 Subject: [PATCH 28/77] correct logic in cnvcontext experiment for tx32x32 Change-Id: I004ded11983b7fda85793912ebc5c6f266dc5eb5 --- vp9/decoder/vp9_detokenize.c | 50 ++++++++++++++------- vp9/encoder/vp9_tokenize.c | 85 +++++++++++++++++++++++++----------- 2 files changed, 95 insertions(+), 40 deletions(-) diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 5e5861aad..117b8649f 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -252,45 +252,65 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, BOOL_DECODER* const bc) { ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context; ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context; + ENTROPY_CONTEXT* const A1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]); + ENTROPY_CONTEXT* const L1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]); unsigned short* const eobs = xd->eobs; const int segment_id = xd->mode_info_context->mbmi.segment_id; int c, i, eobtotal = 0, seg_eob; // Luma block - eobs[0] = c = decode_coefs(pbi, xd, bc, A, L, PLANE_TYPE_Y_WITH_DC, +#if CONFIG_CNVCONTEXT + ENTROPY_CONTEXT above_ec = (A[0] + A[1] + A[2] + A[3] + + A1[0] + A1[1] + A1[2] + A1[3]) != 0; + ENTROPY_CONTEXT left_ec = (L[0] + L[1] + L[2] + L[3] + + L1[0] + L1[1] + L1[2] + L1[3]) != 0; +#else + ENTROPY_CONTEXT above_ec = A[0]; + ENTROPY_CONTEXT left_ec = L[0]; +#endif + eobs[0] = c = decode_coefs(pbi, xd, bc, &above_ec, &left_ec, + PLANE_TYPE_Y_WITH_DC, DCT_DCT, get_eob(xd, segment_id, 1024), xd->sb_coeff_data.qcoeff, vp9_default_zig_zag1d_32x32, TX_32X32, vp9_coef_bands_32x32); - A[1] = A[2] = A[3] = A[0]; - L[1] = L[2] = L[3] = L[0]; + A[1] = A[2] = A[3] = A[0] = above_ec; + L[1] = L[2] = L[3] = L[0] = left_ec; + A1[1] = A1[2] = A1[3] = A1[0] = above_ec; + L1[1] = L1[2] = L1[3] = L1[0] = left_ec; + eobtotal += c; // 16x16 chroma blocks seg_eob = get_eob(xd, segment_id, 256); + for (i = 16; i < 24; i += 4) { ENTROPY_CONTEXT* const a = A + vp9_block2above[TX_16X16][i]; ENTROPY_CONTEXT* const l = L + vp9_block2left[TX_16X16][i]; + ENTROPY_CONTEXT* const a1 = A1 + vp9_block2above[TX_16X16][i]; + ENTROPY_CONTEXT* const l1 = L1 + vp9_block2left[TX_16X16][i]; +#if CONFIG_CNVCONTEXT + above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; + left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; +#else + above_ec = a[0]; + left_ec = l[0]; +#endif - eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV, + eobs[i] = c = decode_coefs(pbi, xd, bc, + &above_ec, &left_ec, + PLANE_TYPE_UV, DCT_DCT, seg_eob, xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64, vp9_default_zig_zag1d_16x16, TX_16X16, vp9_coef_bands_16x16); - a[1] = a[0]; - l[1] = l[0]; + + a1[1] = a1[0] = a[1] = a[0] = above_ec; + l1[1] = l1[0] = l[1] = l[0] = left_ec; eobtotal += c; } - // no Y2 block - vpx_memset(&A[8], 0, sizeof(A[8])); - vpx_memset(&L[8], 0, sizeof(L[8])); - - vpx_memcpy(xd->above_context + 1, xd->above_context, - sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(xd->left_context + 1, xd->left_context, - sizeof(ENTROPY_CONTEXT_PLANES)); - + A[8] = L[8] = A1[8] = L1[8] = 0; return eobtotal; } #endif diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index fb9e3ed72..491914a5b 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -130,6 +130,14 @@ static void tokenize_b(VP9_COMP *cpi, vp9_block2left[tx_size][ib]; ENTROPY_CONTEXT a_ec = *a, l_ec = *l; +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 + ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) + + vp9_block2above[tx_size][ib]; + ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) + + vp9_block2left[tx_size][ib]; +#endif + + switch (tx_size) { default: case TX_4X4: @@ -176,6 +184,11 @@ static void tokenize_b(VP9_COMP *cpi, if (type != PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 + } else { + a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; +#endif } #endif seg_eob = 256; @@ -197,6 +210,14 @@ static void tokenize_b(VP9_COMP *cpi, break; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS case TX_32X32: +#if CONFIG_CNVCONTEXT + a_ec = a[0] + a[1] + a[2] + a[3] + + a1[0] + a1[1] + a1[2] + a1[3]; + l_ec = l[0] + l[1] + l[2] + l[3] + + l1[0] + l1[1] + l1[2] + l1[3]; + a_ec = a_ec != 0; + l_ec = l_ec != 0; +#endif seg_eob = 1024; bands = vp9_coef_bands_32x32; scan = vp9_default_zig_zag1d_32x32; @@ -253,10 +274,17 @@ static void tokenize_b(VP9_COMP *cpi, l[1] = l[2] = l[3] = l_ec; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS } else { - a[1] = a_ec; - l[1] = l_ec; + a1[0] = a1[1] = a[1] = a_ec; + l1[0] = l1[1] = l[1] = l_ec; #endif } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + } else if (tx_size == TX_32X32) { + a[1] = a[2] = a[3] = a_ec; + l[1] = l[2] = l[3] = l_ec; + a1[0] = a1[1] = a1[2] = a1[3] = a_ec; + l1[0] = l1[1] = l1[2] = l1[3] = l_ec; +#endif } } @@ -381,18 +409,12 @@ void vp9_tokenize_sb(VP9_COMP *cpi, tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); - A[0][1] = A[0][2] = A[0][3] = A[0][0]; - L[0][1] = L[0][2] = L[0][3] = L[0][0]; for (b = 16; b < 24; b += 4) { tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); } - vpx_memset(&A[0][8], 0, sizeof(A[0][8])); - vpx_memset(&L[0][8], 0, sizeof(L[0][8])); - vpx_memcpy(A[1], A[0], sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(L[1], L[0], sizeof(ENTROPY_CONTEXT_PLANES)); - + A[0][8] = L[0][8] = A[1][8] = L[1][8] = 0; if (dry_run) *t = t_backup; } @@ -425,6 +447,7 @@ void vp9_tokenize_mb(VP9_COMP *cpi, switch (tx_size) { case TX_16X16: + xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable_16x16(xd); break; case TX_8X8: @@ -499,7 +522,6 @@ void vp9_tokenize_mb(VP9_COMP *cpi, *t = t_backup; } - #ifdef ENTROPY_STATS void init_context_counters(void) { FILE *f = fopen("context.bin", "rb"); @@ -719,12 +741,17 @@ static __inline void stuff_b(VP9_COMP *cpi, TOKENEXTRA *t = *tp; const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type(xd, b) : DCT_DCT; - ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib]; ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib]; ENTROPY_CONTEXT a_ec = *a, l_ec = *l; +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 + ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) + + vp9_block2above[tx_size][ib]; + ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) + + vp9_block2left[tx_size][ib]; +#endif switch (tx_size) { default: @@ -759,6 +786,11 @@ static __inline void stuff_b(VP9_COMP *cpi, if (type != PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 + } else { + a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; +#endif } #endif bands = vp9_coef_bands_16x16; @@ -772,6 +804,14 @@ static __inline void stuff_b(VP9_COMP *cpi, break; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS case TX_32X32: +#if CONFIG_CNVCONTEXT + a_ec = a[0] + a[1] + a[2] + a[3] + + a1[0] + a1[1] + a1[2] + a1[3]; + l_ec = l[0] + l[1] + l[2] + l[3] + + l1[0] + l1[1] + l1[2] + l1[3]; + a_ec = a_ec != 0; + l_ec = l_ec != 0; +#endif bands = vp9_coef_bands_32x32; counts = cpi->coef_counts_32x32; probs = cpi->common.fc.coef_probs_32x32; @@ -797,10 +837,17 @@ static __inline void stuff_b(VP9_COMP *cpi, l[1] = l[2] = l[3] = 0; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS } else { - a[1] = 0; - l[1] = 0; + a1[0] = a1[1] = a[1] = a_ec; + l1[0] = l1[1] = l[1] = l_ec; #endif } +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + } else if (tx_size == TX_32X32) { + a[1] = a[2] = a[3] = a_ec; + l[1] = l[2] = l[3] = l_ec; + a1[0] = a1[1] = a1[2] = a1[3] = a_ec; + l1[0] = l1[1] = l1[2] = l1[3] = l_ec; +#endif } if (!dry_run) { @@ -917,24 +964,12 @@ void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS static void stuff_sb_32x32(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - ENTROPY_CONTEXT *A[2] = { (ENTROPY_CONTEXT *) (xd->above_context + 0), - (ENTROPY_CONTEXT *) (xd->above_context + 1), }; - ENTROPY_CONTEXT *L[2] = { (ENTROPY_CONTEXT *) (xd->left_context + 0), - (ENTROPY_CONTEXT *) (xd->left_context + 1), }; int b; stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); - A[0][1] = A[0][2] = A[0][3] = A[0][0]; - L[0][1] = L[0][2] = L[0][3] = L[0][0]; for (b = 16; b < 24; b += 4) { stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); - A[0][vp9_block2above[TX_16X16][b] + 1] = A[0][vp9_block2above[TX_16X16][b]]; - L[0][vp9_block2left[TX_16X16][b] + 1] = L[0][vp9_block2left[TX_16X16][b]]; } - vpx_memset(&A[0][8], 0, sizeof(A[0][8])); - vpx_memset(&L[0][8], 0, sizeof(L[0][8])); - vpx_memcpy(A[1], A[0], sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(L[1], L[0], sizeof(ENTROPY_CONTEXT_PLANES)); } void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { From 28765690fe24d919768ca0d0ddfd7958201fcb8a Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Tue, 18 Dec 2012 14:36:20 -0800 Subject: [PATCH 29/77] Changed MAX_PSNR to 100 The MAX_PSNR was used to assign a "psnr" number when the mse is close to zero. The direct assignment is used to prevent divide by zero in computation. Changing it from 60 to 100 to be consistent against what is being done in VP9 Change-Id: I4854ffc4961e59d372ec8005a0d52ca46e3c4c1a --- vp8/encoder/psnr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c index 5bb49ad26..b3a3d9552 100644 --- a/vp8/encoder/psnr.c +++ b/vp8/encoder/psnr.c @@ -13,7 +13,7 @@ #include "math.h" #include "vp8/common/systemdependent.h" /* for vp8_clear_system_state() */ -#define MAX_PSNR 60 +#define MAX_PSNR 100 double vp8_mse2psnr(double Samples, double Peak, double Mse) { From 4cca47b5385adb8ace28b10db7a20f337b15bbeb Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 18 Dec 2012 15:31:19 -0800 Subject: [PATCH 30/77] Use standard integer types for pixel values and coefficients. For coefficients, use int16_t (instead of short); for pixel values in 16-bit intermediates, use uint16_t (instead of unsigned short); for all others, use uint8_t (instead of unsigned char). Change-Id: I3619cd9abf106c3742eccc2e2f5e89a62774f7da --- vp9/common/vp9_alloccommon.h | 2 +- vp9/common/vp9_blockd.c | 4 +- vp9/common/vp9_blockd.h | 48 +-- vp9/common/vp9_coefupdateprobs.h | 4 + vp9/common/vp9_common.h | 23 +- vp9/common/vp9_common_types.h | 18 - vp9/common/vp9_default_coef_probs.h | 2 +- vp9/common/vp9_entropy.c | 12 +- vp9/common/vp9_entropy.h | 7 +- vp9/common/vp9_entropymode.h | 3 +- vp9/common/vp9_entropymv.h | 3 +- vp9/common/vp9_extend.c | 12 +- vp9/common/vp9_extend.h | 7 +- vp9/common/vp9_filter.c | 632 ++++++++++++++-------------- vp9/common/vp9_filter.h | 11 +- vp9/common/vp9_findnearmv.c | 48 +-- vp9/common/vp9_findnearmv.h | 6 +- vp9/common/vp9_header.h | 4 +- vp9/common/vp9_idctllm.c | 132 +++--- vp9/common/vp9_invtrans.c | 6 +- vp9/common/vp9_invtrans.h | 11 +- vp9/common/vp9_loopfilter.c | 6 +- vp9/common/vp9_loopfilter.h | 12 +- vp9/common/vp9_loopfilter_filters.c | 234 +++++----- vp9/common/vp9_mbpitch.c | 25 +- vp9/common/vp9_modecont.h | 4 +- vp9/common/vp9_mv.h | 8 +- vp9/common/vp9_mvref_common.h | 18 +- vp9/common/vp9_onyx.h | 3 +- vp9/common/vp9_onyxc_int.h | 9 +- vp9/common/vp9_postproc.c | 50 +-- vp9/common/vp9_postproc.h | 3 +- vp9/common/vp9_ppflags.h | 4 +- vp9/common/vp9_pragmas.h | 6 +- vp9/common/vp9_pred_common.h | 2 +- vp9/common/vp9_quant_common.h | 6 +- vp9/common/vp9_recon.c | 55 +-- vp9/common/vp9_reconinter.c | 135 +++--- vp9/common/vp9_reconinter.h | 33 +- vp9/common/vp9_reconintra.c | 76 ++-- vp9/common/vp9_reconintra.h | 28 +- vp9/common/vp9_reconintra4x4.c | 73 ++-- vp9/common/vp9_reconintra4x4.h | 2 +- vp9/common/vp9_rtcd_defs.sh | 328 ++++++++------- vp9/common/vp9_sadmxn.h | 18 +- vp9/common/vp9_seg_common.h | 2 +- vp9/common/vp9_setupintrarecon.c | 6 +- vp9/common/vp9_setupintrarecon.h | 5 + vp9/common/vp9_subpelvar.h | 21 +- vp9/common/vp9_subpixel.h | 7 +- vp9/common/vp9_swapyv12buffer.c | 2 +- vp9/common/vp9_swapyv12buffer.h | 2 +- vp9/common/vp9_systemdependent.h | 4 +- vp9/common/vp9_textblit.h | 2 +- vp9/common/vp9_treecoder.h | 39 +- vp9/decoder/vp9_dboolhuff.h | 6 +- vp9/decoder/vp9_decodemv.h | 4 + vp9/decoder/vp9_decodframe.c | 24 +- vp9/decoder/vp9_decodframe.h | 2 +- vp9/decoder/vp9_dequantize.c | 28 +- vp9/decoder/vp9_dequantize.h | 138 +++--- vp9/decoder/vp9_detokenize.c | 12 +- vp9/decoder/vp9_detokenize.h | 2 +- vp9/decoder/vp9_idct_blk.c | 120 +++--- vp9/{common => decoder}/vp9_onyxd.h | 7 +- vp9/decoder/vp9_onyxd_if.c | 4 +- vp9/decoder/vp9_onyxd_int.h | 6 +- vp9/decoder/vp9_reconintra_mt.h | 15 - vp9/decoder/vp9_treereader.h | 2 +- vp9/encoder/vp9_bitstream.h | 2 +- vp9/encoder/vp9_block.h | 59 ++- vp9/encoder/vp9_boolhuff.h | 2 +- vp9/encoder/vp9_encodeframe.c | 8 +- vp9/encoder/vp9_encodeframe.h | 2 +- vp9/encoder/vp9_encodeintra.h | 2 +- vp9/encoder/vp9_encodemb.c | 64 +-- vp9/encoder/vp9_encodemb.h | 31 +- vp9/encoder/vp9_encodemv.h | 3 +- vp9/encoder/vp9_firstpass.c | 8 +- vp9/encoder/vp9_firstpass.h | 5 +- vp9/encoder/vp9_lookahead.h | 5 +- vp9/encoder/vp9_mbgraph.h | 2 +- vp9/encoder/vp9_mcomp.c | 106 ++--- vp9/encoder/vp9_mcomp.h | 2 +- vp9/encoder/vp9_modecosts.h | 2 +- vp9/encoder/vp9_onyx_if.c | 37 +- vp9/encoder/vp9_onyx_int.h | 3 +- vp9/encoder/vp9_picklpf.c | 6 +- vp9/encoder/vp9_picklpf.h | 2 +- vp9/encoder/vp9_psnr.h | 2 +- vp9/encoder/vp9_quantize.c | 126 +++--- vp9/encoder/vp9_quantize.h | 2 +- vp9/encoder/vp9_ratectrl.h | 2 +- vp9/encoder/vp9_rdopt.c | 49 +-- vp9/encoder/vp9_rdopt.h | 2 +- vp9/encoder/vp9_sad_c.c | 407 +++++++++--------- vp9/encoder/vp9_satd_c.c | 11 +- vp9/encoder/vp9_segmentation.h | 2 +- vp9/encoder/vp9_ssim.c | 10 +- vp9/encoder/vp9_temporal_filter.c | 92 ++-- vp9/encoder/vp9_temporal_filter.h | 2 +- vp9/encoder/vp9_tokenize.h | 13 +- vp9/encoder/vp9_treewriter.h | 2 +- vp9/encoder/vp9_variance.h | 39 +- vp9/encoder/vp9_variance_c.c | 124 +++--- vp9/vp9_common.mk | 1 - vp9/vp9_dx_iface.c | 2 +- vpx_scale/yv12config.h | 21 +- 108 files changed, 1884 insertions(+), 1969 deletions(-) delete mode 100644 vp9/common/vp9_common_types.h rename vp9/{common => decoder}/vp9_onyxd.h (97%) delete mode 100644 vp9/decoder/vp9_reconintra_mt.h diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h index 3a37dc3bb..a784cb746 100644 --- a/vp9/common/vp9_alloccommon.h +++ b/vp9/common/vp9_alloccommon.h @@ -23,4 +23,4 @@ void vp9_setup_version(VP9_COMMON *oci); void vp9_update_mode_info_border(VP9_COMMON *cpi, MODE_INFO *mi_base); void vp9_update_mode_info_in_image(VP9_COMMON *cpi, MODE_INFO *mi); -#endif +#endif // VP9_COMMON_VP9_ALLOCCOMMON_H_ diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c index 4c88a4fa2..b8ad433dc 100644 --- a/vp9/common/vp9_blockd.c +++ b/vp9/common/vp9_blockd.c @@ -12,7 +12,7 @@ #include "vp9/common/vp9_blockd.h" #include "vpx_mem/vpx_mem.h" -const unsigned char vp9_block2left[TX_SIZE_MAX_SB][25] = { +const uint8_t vp9_block2left[TX_SIZE_MAX_SB][25] = { {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8}, {0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}, @@ -20,7 +20,7 @@ const unsigned char vp9_block2left[TX_SIZE_MAX_SB][25] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8} #endif }; -const unsigned char vp9_block2above[TX_SIZE_MAX_SB][25] = { +const uint8_t vp9_block2above[TX_SIZE_MAX_SB][25] = { {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8}, {0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}, diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 4306eb02a..ad5f3b36c 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -279,19 +279,19 @@ typedef struct { } MODE_INFO; typedef struct blockd { - short *qcoeff; - short *dqcoeff; - unsigned char *predictor; - short *diff; - short *dequant; + int16_t *qcoeff; + int16_t *dqcoeff; + uint8_t *predictor; + int16_t *diff; + int16_t *dequant; /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ - unsigned char **base_pre; - unsigned char **base_second_pre; + uint8_t **base_pre; + uint8_t **base_second_pre; int pre; int pre_stride; - unsigned char **base_dst; + uint8_t **base_dst; int dst; int dst_stride; @@ -303,18 +303,18 @@ typedef struct blockd { #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS typedef struct superblockd { /* 32x32 Y and 16x16 U/V. No 2nd order transform yet. */ - DECLARE_ALIGNED(16, short, diff[32*32+16*16*2]); - DECLARE_ALIGNED(16, short, qcoeff[32*32+16*16*2]); - DECLARE_ALIGNED(16, short, dqcoeff[32*32+16*16*2]); + DECLARE_ALIGNED(16, int16_t, diff[32*32+16*16*2]); + DECLARE_ALIGNED(16, int16_t, qcoeff[32*32+16*16*2]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[32*32+16*16*2]); } SUPERBLOCKD; #endif typedef struct macroblockd { - DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */ - DECLARE_ALIGNED(16, unsigned char, predictor[384]); - DECLARE_ALIGNED(16, short, qcoeff[400]); - DECLARE_ALIGNED(16, short, dqcoeff[400]); - DECLARE_ALIGNED(16, unsigned short, eobs[25]); + DECLARE_ALIGNED(16, int16_t, diff[400]); /* from idct diff */ + DECLARE_ALIGNED(16, uint8_t, predictor[384]); + DECLARE_ALIGNED(16, int16_t, qcoeff[400]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[400]); + DECLARE_ALIGNED(16, uint16_t, eobs[25]); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS SUPERBLOCKD sb_coeff_data; @@ -389,10 +389,10 @@ typedef struct macroblockd { unsigned int frames_till_alt_ref_frame; /* Inverse transform function pointers. */ - void (*inv_xform4x4_1_x8)(short *input, short *output, int pitch); - void (*inv_xform4x4_x8)(short *input, short *output, int pitch); - void (*inv_walsh4x4_1)(short *in, short *out); - void (*inv_walsh4x4_lossless)(short *in, short *out); + void (*inv_xform4x4_1_x8)(int16_t *input, int16_t *output, int pitch); + void (*inv_xform4x4_x8)(int16_t *input, int16_t *output, int pitch); + void (*inv_walsh4x4_1)(int16_t *in, int16_t *out); + void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out); vp9_subpix_fn_t subpixel_predict; @@ -412,7 +412,7 @@ typedef struct macroblockd { * to keep a copy of the reference area. This buffer can be used for other * purpose. */ - DECLARE_ALIGNED(32, unsigned char, y_buf[22 * 32]); + DECLARE_ALIGNED(32, uint8_t, y_buf[22 * 32]); #endif int mb_index; // Index of the MB in the SB (0..3) @@ -502,8 +502,8 @@ static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) { return tx_type; } -extern const unsigned char vp9_block2left[TX_SIZE_MAX_SB][25]; -extern const unsigned char vp9_block2above[TX_SIZE_MAX_SB][25]; +extern const uint8_t vp9_block2left[TX_SIZE_MAX_SB][25]; +extern const uint8_t vp9_block2above[TX_SIZE_MAX_SB][25]; #define USE_ADST_FOR_I16X16_8X8 0 #define USE_ADST_FOR_I16X16_4X4 0 @@ -665,4 +665,4 @@ static void update_blockd_bmi(MACROBLOCKD *xd) { } } } -#endif /* __INC_BLOCKD_H */ +#endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/vp9/common/vp9_coefupdateprobs.h b/vp9/common/vp9_coefupdateprobs.h index cd7eabfa5..ee250e048 100644 --- a/vp9/common/vp9_coefupdateprobs.h +++ b/vp9/common/vp9_coefupdateprobs.h @@ -8,9 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ +#ifndef VP9_COMMON_VP9_COEFUPDATEPROBS_H_ +#define VP9_COMMON_VP9_COEFUPDATEPROBS_H__ /* Update probabilities for the nodes in the token entropy tree. Generated file included by vp9_entropy.c */ #define COEF_UPDATE_PROB 252 #define COEF_UPDATE_PROB_8X8 252 #define COEF_UPDATE_PROB_16X16 252 + +#endif // VP9_COMMON_VP9_COEFUPDATEPROBS_H__ diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h index d6887afa0..ee027585c 100644 --- a/vp9/common/vp9_common.h +++ b/vp9/common/vp9_common.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_COMMON_H_ #define VP9_COMMON_VP9_COMMON_H_ @@ -18,28 +17,30 @@ #include "vpx_mem/vpx_mem.h" #include "vpx/vpx_integer.h" -#include "vp9/common/vp9_common_types.h" + +#define TRUE 1 +#define FALSE 0 /* Only need this for fixed-size arrays, for structs just assign. */ -#define vp9_copy( Dest, Src) { \ - assert( sizeof( Dest) == sizeof( Src)); \ - vpx_memcpy( Dest, Src, sizeof( Src)); \ +#define vp9_copy(Dest, Src) { \ + assert(sizeof(Dest) == sizeof(Src)); \ + vpx_memcpy(Dest, Src, sizeof(Src)); \ } /* Use this for variably-sized arrays. */ -#define vp9_copy_array( Dest, Src, N) { \ - assert( sizeof( *Dest) == sizeof( *Src)); \ - vpx_memcpy( Dest, Src, N * sizeof( *Src)); \ +#define vp9_copy_array(Dest, Src, N) { \ + assert(sizeof(*Dest) == sizeof(*Src)); \ + vpx_memcpy(Dest, Src, N * sizeof(*Src)); \ } -#define vp9_zero( Dest) vpx_memset( &Dest, 0, sizeof( Dest)); +#define vp9_zero(Dest) vpx_memset(&Dest, 0, sizeof(Dest)); -#define vp9_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest)); +#define vp9_zero_array(Dest, N) vpx_memset(Dest, 0, N * sizeof(*Dest)); static __inline uint8_t clip_pixel(int val) { return (val > 255) ? 255u : (val < 0) ? 0u : val; } -#endif /* common_h */ +#endif // VP9_COMMON_VP9_COMMON_H_ diff --git a/vp9/common/vp9_common_types.h b/vp9/common/vp9_common_types.h deleted file mode 100644 index 8982898ca..000000000 --- a/vp9/common/vp9_common_types.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_VP9_COMMON_TYPES_H_ -#define VP9_COMMON_VP9_COMMON_TYPES_H_ - -#define TRUE 1 -#define FALSE 0 - -#endif diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index afd517064..6968dcd76 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -1210,4 +1210,4 @@ static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = { } } }; -#endif +#endif // CONFIG_SUPERBLOCKS && CONFIG_TX32X32 diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 27e90b66c..559757e81 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -17,18 +17,12 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_entropymode.h" #include "vpx_mem/vpx_mem.h" - -#define uchar unsigned char /* typedefs can clash */ -#define uint unsigned int - -typedef const uchar cuchar; -typedef const uint cuint; - +#include "vpx/vpx_integer.h" #include "vp9/common/vp9_coefupdateprobs.h" const int vp9_i8x8_block[4] = {0, 2, 8, 10}; -DECLARE_ALIGNED(16, const unsigned char, vp9_norm[256]) = { +DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = { 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -51,7 +45,7 @@ DECLARE_ALIGNED(16, const int, vp9_coef_bands_4x4[16]) = { 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7 }; -DECLARE_ALIGNED(16, cuchar, vp9_prev_token_class[MAX_ENTROPY_TOKENS]) = { +DECLARE_ALIGNED(16, const uint8_t, vp9_prev_token_class[MAX_ENTROPY_TOKENS]) = { 0, 1, 2, 2, 3, 3, 3, 3, 3, 3, 3, 0 }; diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index f55ab8c93..eb8cfe93c 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -8,10 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_ENTROPY_H_ #define VP9_COMMON_VP9_ENTROPY_H_ +#include "vpx/vpx_integer.h" #include "vp9/common/vp9_treecoder.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_common.h" @@ -110,7 +110,8 @@ typedef vp9_prob vp9_coeff_probs[COEF_BANDS][PREV_COEF_CONTEXTS] #define SUBEXP_PARAM 4 /* Subexponential code parameter */ #define MODULUS_PARAM 13 /* Modulus parameter */ -extern DECLARE_ALIGNED(16, const unsigned char, vp9_prev_token_class[MAX_ENTROPY_TOKENS]); +extern DECLARE_ALIGNED(16, const uint8_t, + vp9_prev_token_class[MAX_ENTROPY_TOKENS]); struct VP9Common; void vp9_default_coef_probs(struct VP9Common *); @@ -135,4 +136,4 @@ static void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } -#endif +#endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index 78ec325b9..fe3ace6bc 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_ENTROPYMODE_H_ #define VP9_COMMON_VP9_ENTROPYMODE_H_ @@ -114,4 +113,4 @@ extern struct vp9_token_struct vp9_switchable_interp_encodings extern const vp9_prob vp9_switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS - 1]; -#endif +#endif // VP9_COMMON_VP9_ENTROPYMODE_H_ diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h index 66126daf3..dcdd0ec53 100644 --- a/vp9/common/vp9_entropymv.h +++ b/vp9/common/vp9_entropymv.h @@ -126,4 +126,5 @@ void vp9_counts_to_nmv_context( unsigned int (*branch_ct_class0_hp)[2], unsigned int (*branch_ct_hp)[2]); void vp9_counts_process(nmv_context_counts *NMVcount, int usehp); -#endif + +#endif // VP9_COMMON_VP9_ENTROPYMV_H_ diff --git a/vp9/common/vp9_extend.c b/vp9/common/vp9_extend.c index 61c7abfc7..d3e66f696 100644 --- a/vp9/common/vp9_extend.c +++ b/vp9/common/vp9_extend.c @@ -11,9 +11,9 @@ #include "vp9/common/vp9_extend.h" #include "vpx_mem/vpx_mem.h" -static void copy_and_extend_plane(unsigned char *s, /* source */ +static void copy_and_extend_plane(uint8_t *s, /* source */ int sp, /* source pitch */ - unsigned char *d, /* destination */ + uint8_t *d, /* destination */ int dp, /* destination pitch */ int h, /* height */ int w, /* width */ @@ -22,8 +22,8 @@ static void copy_and_extend_plane(unsigned char *s, /* source */ int eb, /* extend bottom border */ int er) { /* extend right border */ int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; + uint8_t *src_ptr1, *src_ptr2; + uint8_t *dest_ptr1, *dest_ptr2; int linesize; /* copy the left and right most columns out */ @@ -143,8 +143,8 @@ void vp9_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, } /* note the extension is only for the last row, for intra prediction purpose */ -void vp9_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, - unsigned char *UPtr, unsigned char *VPtr) { +void vp9_extend_mb_row(YV12_BUFFER_CONFIG *ybf, uint8_t *YPtr, + uint8_t *UPtr, uint8_t *VPtr) { int i; YPtr += ybf->y_stride * 14; diff --git a/vp9/common/vp9_extend.h b/vp9/common/vp9_extend.h index 55036f86b..847c2c5b9 100644 --- a/vp9/common/vp9_extend.h +++ b/vp9/common/vp9_extend.h @@ -12,9 +12,10 @@ #define VP9_COMMON_VP9_EXTEND_H_ #include "vpx_scale/yv12config.h" +#include "vpx/vpx_integer.h" -void vp9_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, - unsigned char *UPtr, unsigned char *VPtr); +void vp9_extend_mb_row(YV12_BUFFER_CONFIG *ybf, uint8_t *YPtr, + uint8_t *UPtr, uint8_t *VPtr); void vp9_copy_and_extend_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst); @@ -24,4 +25,4 @@ void vp9_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, int srcy, int srcx, int srch, int srcw); -#endif // __INC_EXTEND_H +#endif // VP9_COMMON_VP9_EXTEND_H_ diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c index 54d45f5a3..2adbfe137 100644 --- a/vp9/common/vp9_filter.c +++ b/vp9/common/vp9_filter.c @@ -15,7 +15,7 @@ #include "vp9_rtcd.h" #include "vp9/common/vp9_common.h" -DECLARE_ALIGNED(16, const short, vp9_bilinear_filters[SUBPEL_SHIFTS][2]) = { +DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][2]) = { { 128, 0 }, { 120, 8 }, { 112, 16 }, @@ -36,7 +36,7 @@ DECLARE_ALIGNED(16, const short, vp9_bilinear_filters[SUBPEL_SHIFTS][2]) = { #define FILTER_ALPHA 0 #define FILTER_ALPHA_SHARP 1 -DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = { +DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = { #if FILTER_ALPHA == 0 /* Lagrangian interpolation filter */ { 0, 0, 0, 128, 0, 0, 0, 0}, @@ -82,7 +82,7 @@ DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = { #endif /* FILTER_ALPHA */ }; -DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = { +DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = { #if FILTER_ALPHA_SHARP == 1 /* dct based filter */ {0, 0, 0, 128, 0, 0, 0, 0}, @@ -122,7 +122,7 @@ DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = { #endif /* FILTER_ALPHA_SHARP */ }; -DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6]) = { +DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6]) = { {0, 0, 128, 0, 0, 0}, {1, -5, 125, 8, -2, 1}, {1, -8, 122, 17, -5, 1}, @@ -141,13 +141,13 @@ DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6]) = { {1, -2, 8, 125, -5, 1} }; -static void filter_block2d_first_pass_6(unsigned char *src_ptr, +static void filter_block2d_first_pass_6(uint8_t *src_ptr, int *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, - const short *vp9_filter) { + const int16_t *vp9_filter) { unsigned int i, j; int temp; @@ -173,13 +173,13 @@ static void filter_block2d_first_pass_6(unsigned char *src_ptr, } static void filter_block2d_second_pass_6(int *src_ptr, - unsigned char *output_ptr, + uint8_t *output_ptr, int output_pitch, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, - const short *vp9_filter) { + const int16_t *vp9_filter) { unsigned int i, j; int temp; @@ -214,13 +214,13 @@ static void filter_block2d_second_pass_6(int *src_ptr, * ((filter_result + dest + 1) >> 1) and stores that in the output. */ static void filter_block2d_second_pass_avg_6(int *src_ptr, - unsigned char *output_ptr, + uint8_t *output_ptr, int output_pitch, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, - const short *vp9_filter) { + const int16_t *vp9_filter) { unsigned int i, j; int temp; @@ -248,12 +248,12 @@ static void filter_block2d_second_pass_avg_6(int *src_ptr, } #define Interp_Extend 3 -static void filter_block2d_6(unsigned char *src_ptr, - unsigned char *output_ptr, +static void filter_block2d_6(uint8_t *src_ptr, + uint8_t *output_ptr, unsigned int src_pixels_per_line, int output_pitch, - const short *HFilter, - const short *VFilter) { + const int16_t *HFilter, + const int16_t *VFilter) { int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ @@ -265,14 +265,14 @@ static void filter_block2d_6(unsigned char *src_ptr, } -void vp9_sixtap_predict_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, +void vp9_sixtap_predict_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, int dst_pitch) { - const short *HFilter; - const short *VFilter; + const int16_t *HFilter; + const int16_t *VFilter; HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */ @@ -287,12 +287,12 @@ void vp9_sixtap_predict_c(unsigned char *src_ptr, * then averages that with the content already present in the output * ((filter_result + dest + 1) >> 1) and stores that in the output. */ -static void filter_block2d_avg_6(unsigned char *src_ptr, - unsigned char *output_ptr, +static void filter_block2d_avg_6(uint8_t *src_ptr, + uint8_t *output_ptr, unsigned int src_pixels_per_line, int output_pitch, - const short *HFilter, - const short *VFilter) { + const int16_t *HFilter, + const int16_t *VFilter) { int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ @@ -305,17 +305,14 @@ static void filter_block2d_avg_6(unsigned char *src_ptr, output_pitch, 4, 4, 4, 4, VFilter); } -void vp9_sixtap_predict_avg_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; +void vp9_sixtap_predict_avg_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */ @@ -324,17 +321,14 @@ void vp9_sixtap_predict_avg_c dst_pitch, HFilter, VFilter); } -void vp9_sixtap_predict8x8_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; +void vp9_sixtap_predict8x8_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; // int FData[(7+Interp_Extend*2)*16]; /* Temp data buffer used in filtering */ int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */ @@ -351,17 +345,14 @@ void vp9_sixtap_predict8x8_c } -void vp9_sixtap_predict_avg8x8_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; +void vp9_sixtap_predict_avg8x8_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; // int FData[(7+Interp_Extend*2)*16]; /* Temp data buffer used in filtering */ int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */ @@ -376,17 +367,14 @@ void vp9_sixtap_predict_avg8x8_c filter_block2d_second_pass_avg_6(FData + 8 * (Interp_Extend - 1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); } -void vp9_sixtap_predict8x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; +void vp9_sixtap_predict8x4_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; // int FData[(7+Interp_Extend*2)*16]; /* Temp data buffer used in filtering */ int FData[(3 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */ @@ -403,17 +391,14 @@ void vp9_sixtap_predict8x4_c } -void vp9_sixtap_predict16x16_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; +void vp9_sixtap_predict16x16_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; // int FData[(15+Interp_Extend*2)*24]; /* Temp data buffer used in filtering */ int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer used in filtering */ @@ -430,17 +415,14 @@ void vp9_sixtap_predict16x16_c } -void vp9_sixtap_predict_avg16x16_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) { - const short *HFilter; - const short *VFilter; +void vp9_sixtap_predict_avg16x16_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; // int FData[(15+Interp_Extend*2)*24]; /* Temp data buffer used in filtering */ int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer used in filtering */ @@ -470,13 +452,13 @@ static const unsigned int filter_size_to_wh[][2] = { {16,16}, }; -static void filter_block2d_8_c(const unsigned char *src_ptr, - const unsigned int src_stride, - const short *HFilter, - const short *VFilter, +static void filter_block2d_8_c(const uint8_t *src_ptr, + const unsigned int src_stride, + const int16_t *HFilter, + const int16_t *VFilter, const filter_size_t filter_size, - unsigned char *dst_ptr, - unsigned int dst_stride) { + uint8_t *dst_ptr, + unsigned int dst_stride) { const unsigned int output_width = filter_size_to_wh[filter_size][0]; const unsigned int output_height = filter_size_to_wh[filter_size][1]; @@ -495,12 +477,12 @@ static void filter_block2d_8_c(const unsigned char *src_ptr, * = 23 * and filter_max_width = 16 */ - unsigned char intermediate_buffer[23 * 16]; + uint8_t intermediate_buffer[23 * 16]; const int intermediate_next_stride = 1 - intermediate_height * output_width; // Horizontal pass (src -> transposed intermediate). { - unsigned char *output_ptr = intermediate_buffer; + uint8_t *output_ptr = intermediate_buffer; const int src_next_row_stride = src_stride - output_width; unsigned int i, j; src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); @@ -529,7 +511,7 @@ static void filter_block2d_8_c(const unsigned char *src_ptr, // Vertical pass (transposed intermediate -> dst). { - unsigned char *src_ptr = intermediate_buffer; + uint8_t *src_ptr = intermediate_buffer; const int dst_next_row_stride = dst_stride - output_width; unsigned int i, j; for (i = 0; i < output_height; i++) { @@ -555,53 +537,53 @@ static void filter_block2d_8_c(const unsigned char *src_ptr, } } -void vp9_filter_block2d_4x4_8_c(const unsigned char *src_ptr, +void vp9_filter_block2d_4x4_8_c(const uint8_t *src_ptr, const unsigned int src_stride, - const short *HFilter_aligned16, - const short *VFilter_aligned16, - unsigned char *dst_ptr, + const int16_t *HFilter_aligned16, + const int16_t *VFilter_aligned16, + uint8_t *dst_ptr, unsigned int dst_stride) { filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16, VPX_FILTER_4x4, dst_ptr, dst_stride); } -void vp9_filter_block2d_8x4_8_c(const unsigned char *src_ptr, +void vp9_filter_block2d_8x4_8_c(const uint8_t *src_ptr, const unsigned int src_stride, - const short *HFilter_aligned16, - const short *VFilter_aligned16, - unsigned char *dst_ptr, + const int16_t *HFilter_aligned16, + const int16_t *VFilter_aligned16, + uint8_t *dst_ptr, unsigned int dst_stride) { filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16, VPX_FILTER_8x4, dst_ptr, dst_stride); } -void vp9_filter_block2d_8x8_8_c(const unsigned char *src_ptr, +void vp9_filter_block2d_8x8_8_c(const uint8_t *src_ptr, const unsigned int src_stride, - const short *HFilter_aligned16, - const short *VFilter_aligned16, - unsigned char *dst_ptr, + const int16_t *HFilter_aligned16, + const int16_t *VFilter_aligned16, + uint8_t *dst_ptr, unsigned int dst_stride) { filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16, VPX_FILTER_8x8, dst_ptr, dst_stride); } -void vp9_filter_block2d_16x16_8_c(const unsigned char *src_ptr, +void vp9_filter_block2d_16x16_8_c(const uint8_t *src_ptr, const unsigned int src_stride, - const short *HFilter_aligned16, - const short *VFilter_aligned16, - unsigned char *dst_ptr, + const int16_t *HFilter_aligned16, + const int16_t *VFilter_aligned16, + uint8_t *dst_ptr, unsigned int dst_stride) { filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16, VPX_FILTER_16x16, dst_ptr, dst_stride); } -static void block2d_average_c(unsigned char *src, - unsigned int src_stride, - unsigned char *output_ptr, +static void block2d_average_c(uint8_t *src, + unsigned int src_stride, + uint8_t *output_ptr, unsigned int output_stride, const filter_size_t filter_size) { const unsigned int output_width = filter_size_to_wh[filter_size][0]; @@ -618,14 +600,14 @@ static void block2d_average_c(unsigned char *src, #define block2d_average block2d_average_c -void vp9_eighttap_predict_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, +void vp9_eighttap_predict_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, int dst_pitch) { - const short *HFilter; - const short *VFilter; + const int16_t *HFilter; + const int16_t *VFilter; HFilter = vp9_sub_pel_filters_8[xoffset]; VFilter = vp9_sub_pel_filters_8[yoffset]; @@ -635,15 +617,15 @@ void vp9_eighttap_predict_c(unsigned char *src_ptr, dst_ptr, dst_pitch); } -void vp9_eighttap_predict_avg4x4_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, +void vp9_eighttap_predict_avg4x4_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, int dst_pitch) { - const short *HFilter = vp9_sub_pel_filters_8[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8[yoffset]; - unsigned char tmp[4 * 4]; + const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; + uint8_t tmp[4 * 4]; vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, @@ -651,14 +633,14 @@ void vp9_eighttap_predict_avg4x4_c(unsigned char *src_ptr, block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4); } -void vp9_eighttap_predict_sharp_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, +void vp9_eighttap_predict_sharp_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, int dst_pitch) { - const short *HFilter; - const short *VFilter; + const int16_t *HFilter; + const int16_t *VFilter; HFilter = vp9_sub_pel_filters_8s[xoffset]; VFilter = vp9_sub_pel_filters_8s[yoffset]; @@ -668,15 +650,15 @@ void vp9_eighttap_predict_sharp_c(unsigned char *src_ptr, dst_ptr, dst_pitch); } -void vp9_eighttap_predict_avg4x4_sharp_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, +void vp9_eighttap_predict_avg4x4_sharp_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, int dst_pitch) { - const short *HFilter = vp9_sub_pel_filters_8s[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8s[yoffset]; - unsigned char tmp[4 * 4]; + const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; + uint8_t tmp[4 * 4]; vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, @@ -684,43 +666,43 @@ void vp9_eighttap_predict_avg4x4_sharp_c(unsigned char *src_ptr, block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4); } -void vp9_eighttap_predict8x8_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - const short *HFilter = vp9_sub_pel_filters_8[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8[yoffset]; +void vp9_eighttap_predict8x8_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); } -void vp9_eighttap_predict8x8_sharp_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - const short *HFilter = vp9_sub_pel_filters_8s[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8s[yoffset]; +void vp9_eighttap_predict8x8_sharp_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); } -void vp9_eighttap_predict_avg8x8_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char tmp[8 * 8]; - const short *HFilter = vp9_sub_pel_filters_8[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8[yoffset]; +void vp9_eighttap_predict_avg8x8_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + uint8_t tmp[8 * 8]; + const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, @@ -728,15 +710,15 @@ void vp9_eighttap_predict_avg8x8_c(unsigned char *src_ptr, block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8); } -void vp9_eighttap_predict_avg8x8_sharp_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char tmp[8 * 8]; - const short *HFilter = vp9_sub_pel_filters_8s[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8s[yoffset]; +void vp9_eighttap_predict_avg8x8_sharp_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + uint8_t tmp[8 * 8]; + const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, @@ -744,71 +726,71 @@ void vp9_eighttap_predict_avg8x8_sharp_c(unsigned char *src_ptr, block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8); } -void vp9_eighttap_predict8x4_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - const short *HFilter = vp9_sub_pel_filters_8[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8[yoffset]; +void vp9_eighttap_predict8x4_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); } -void vp9_eighttap_predict8x4_sharp_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - const short *HFilter = vp9_sub_pel_filters_8s[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8s[yoffset]; +void vp9_eighttap_predict8x4_sharp_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); } -void vp9_eighttap_predict16x16_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - const short *HFilter = vp9_sub_pel_filters_8[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8[yoffset]; +void vp9_eighttap_predict16x16_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); } -void vp9_eighttap_predict16x16_sharp_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - const short *HFilter = vp9_sub_pel_filters_8s[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8s[yoffset]; +void vp9_eighttap_predict16x16_sharp_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); } -void vp9_eighttap_predict_avg16x16_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 16 * 16); - const short *HFilter = vp9_sub_pel_filters_8[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8[yoffset]; +void vp9_eighttap_predict_avg16x16_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp, 16 * 16); + const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter, @@ -816,15 +798,15 @@ void vp9_eighttap_predict_avg16x16_c(unsigned char *src_ptr, block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16); } -void vp9_eighttap_predict_avg16x16_sharp_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 16 * 16); - const short *HFilter = vp9_sub_pel_filters_8s[xoffset]; - const short *VFilter = vp9_sub_pel_filters_8s[yoffset]; +void vp9_eighttap_predict_avg16x16_sharp_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp, 16 * 16); + const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter, @@ -854,12 +836,12 @@ void vp9_eighttap_predict_avg16x16_sharp_c(unsigned char *src_ptr, * Two filter taps should sum to VP9_FILTER_WEIGHT. * ****************************************************************************/ -static void filter_block2d_bil_first_pass(unsigned char *src_ptr, - unsigned short *dst_ptr, - unsigned int src_stride, - unsigned int height, - unsigned int width, - const short *vp9_filter) { +static void filter_block2d_bil_first_pass(uint8_t *src_ptr, + uint16_t *dst_ptr, + unsigned int src_stride, + unsigned int height, + unsigned int width, + const int16_t *vp9_filter) { unsigned int i, j; for (i = 0; i < height; i++) { @@ -899,13 +881,13 @@ static void filter_block2d_bil_first_pass(unsigned char *src_ptr, * Two filter taps should sum to VP9_FILTER_WEIGHT. * ****************************************************************************/ -static void filter_block2d_bil_second_pass(unsigned short *src_ptr, - unsigned char *dst_ptr, - int dst_pitch, - unsigned int height, - unsigned int width, - const short *vp9_filter) { - unsigned int i, j; +static void filter_block2d_bil_second_pass(uint16_t *src_ptr, + uint8_t *dst_ptr, + int dst_pitch, + unsigned int height, + unsigned int width, + const int16_t *vp9_filter) { + unsigned int i, j; int temp; for (i = 0; i < height; i++) { @@ -932,13 +914,13 @@ static void filter_block2d_bil_second_pass(unsigned short *src_ptr, * with the values already present in the output and stores the result of * that back into the output ((filter_result + dest + 1) >> 1). */ -static void filter_block2d_bil_second_pass_avg(unsigned short *src_ptr, - unsigned char *dst_ptr, - int dst_pitch, - unsigned int height, - unsigned int width, - const short *vp9_filter) { - unsigned int i, j; +static void filter_block2d_bil_second_pass_avg(uint16_t *src_ptr, + uint8_t *dst_ptr, + int dst_pitch, + unsigned int height, + unsigned int width, + const int16_t *vp9_filter) { + unsigned int i, j; int temp; for (i = 0; i < height; i++) { @@ -979,16 +961,16 @@ static void filter_block2d_bil_second_pass_avg(unsigned short *src_ptr, * SPECIAL NOTES : The largest block size can be handled here is 16x16 * ****************************************************************************/ -static void filter_block2d_bil(unsigned char *src_ptr, - unsigned char *dst_ptr, - unsigned int src_pitch, - unsigned int dst_pitch, - const short *HFilter, - const short *VFilter, - int Width, - int Height) { +static void filter_block2d_bil(uint8_t *src_ptr, + uint8_t *dst_ptr, + unsigned int src_pitch, + unsigned int dst_pitch, + const int16_t *HFilter, + const int16_t *VFilter, + int Width, + int Height) { - unsigned short FData[17 * 16]; /* Temp data buffer used in filtering */ + uint16_t FData[17 * 16]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); @@ -997,15 +979,15 @@ static void filter_block2d_bil(unsigned char *src_ptr, filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); } -static void filter_block2d_bil_avg(unsigned char *src_ptr, - unsigned char *dst_ptr, - unsigned int src_pitch, - unsigned int dst_pitch, - const short *HFilter, - const short *VFilter, - int Width, - int Height) { - unsigned short FData[17 * 16]; /* Temp data buffer used in filtering */ +static void filter_block2d_bil_avg(uint8_t *src_ptr, + uint8_t *dst_ptr, + unsigned int src_pitch, + unsigned int dst_pitch, + const int16_t *HFilter, + const int16_t *VFilter, + int Width, + int Height) { + uint16_t FData[17 * 16]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); @@ -1014,14 +996,14 @@ static void filter_block2d_bil_avg(unsigned char *src_ptr, filter_block2d_bil_second_pass_avg(FData, dst_ptr, dst_pitch, Height, Width, VFilter); } -void vp9_bilinear_predict4x4_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, +void vp9_bilinear_predict4x4_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, int dst_pitch) { - const short *HFilter; - const short *VFilter; + const int16_t *HFilter; + const int16_t *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -1029,14 +1011,14 @@ void vp9_bilinear_predict4x4_c(unsigned char *src_ptr, filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); } -void vp9_bilinear_predict_avg4x4_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, +void vp9_bilinear_predict_avg4x4_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, int dst_pitch) { - const short *HFilter; - const short *VFilter; + const int16_t *HFilter; + const int16_t *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -1045,14 +1027,14 @@ void vp9_bilinear_predict_avg4x4_c(unsigned char *src_ptr, dst_pitch, HFilter, VFilter, 4, 4); } -void vp9_bilinear_predict8x8_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - const short *HFilter; - const short *VFilter; +void vp9_bilinear_predict8x8_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -1061,14 +1043,14 @@ void vp9_bilinear_predict8x8_c(unsigned char *src_ptr, } -void vp9_bilinear_predict_avg8x8_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - const short *HFilter; - const short *VFilter; +void vp9_bilinear_predict_avg8x8_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -1077,14 +1059,14 @@ void vp9_bilinear_predict_avg8x8_c(unsigned char *src_ptr, dst_pitch, HFilter, VFilter, 8, 8); } -void vp9_bilinear_predict8x4_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - const short *HFilter; - const short *VFilter; +void vp9_bilinear_predict8x4_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -1093,14 +1075,14 @@ void vp9_bilinear_predict8x4_c(unsigned char *src_ptr, } -void vp9_bilinear_predict16x16_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - const short *HFilter; - const short *VFilter; +void vp9_bilinear_predict16x16_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -1108,14 +1090,14 @@ void vp9_bilinear_predict16x16_c(unsigned char *src_ptr, filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); } -void vp9_bilinear_predict_avg16x16_c(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - const short *HFilter; - const short *VFilter; +void vp9_bilinear_predict_avg16x16_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h index 545d39a8a..807a6b2ec 100644 --- a/vp9/common/vp9_filter.h +++ b/vp9/common/vp9_filter.h @@ -13,6 +13,7 @@ #include "vpx_config.h" #include "vpx_scale/yv12config.h" +#include "vpx/vpx_integer.h" #define BLOCK_HEIGHT_WIDTH 4 #define VP9_FILTER_WEIGHT 128 @@ -20,9 +21,9 @@ #define SUBPEL_SHIFTS 16 -extern const short vp9_bilinear_filters[SUBPEL_SHIFTS][2]; -extern const short vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6]; -extern const short vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]; -extern const short vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]; +extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS][2]; +extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6]; +extern const int16_t vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]; +extern const int16_t vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]; -#endif // FILTER_H +#endif // VP9_COMMON_VP9_FILTER_H_ diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index 903d9047e..b5d6bda4d 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -14,7 +14,7 @@ #include "vp9/common/vp9_subpelvar.h" #include -const unsigned char vp9_mbsplit_offset[4][16] = { +const uint8_t vp9_mbsplit_offset[4][16] = { { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, @@ -42,23 +42,23 @@ vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc, } #define SP(x) (((x) & 7) << 1) -unsigned int vp9_sad3x16_c(const unsigned char *src_ptr, +unsigned int vp9_sad3x16_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride) { return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 3, 16); } -unsigned int vp9_sad16x3_c(const unsigned char *src_ptr, +unsigned int vp9_sad16x3_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride) { return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3); } #if CONFIG_SUBPELREFMV -unsigned int vp9_variance2x16_c(const unsigned char *src_ptr, +unsigned int vp9_variance2x16_c(const uint8_t *src_ptr, const int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, const int recon_stride, unsigned int *sse) { int sum; @@ -66,9 +66,9 @@ unsigned int vp9_variance2x16_c(const unsigned char *src_ptr, return (*sse - (((unsigned int)sum * sum) >> 5)); } -unsigned int vp9_variance16x2_c(const unsigned char *src_ptr, +unsigned int vp9_variance16x2_c(const uint8_t *src_ptr, const int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, const int recon_stride, unsigned int *sse) { int sum; @@ -76,16 +76,16 @@ unsigned int vp9_variance16x2_c(const unsigned char *src_ptr, return (*sse - (((unsigned int)sum * sum) >> 5)); } -unsigned int vp9_sub_pixel_variance16x2_c(const unsigned char *src_ptr, +unsigned int vp9_sub_pixel_variance16x2_c(const uint8_t *src_ptr, const int src_pixels_per_line, const int xoffset, const int yoffset, - const unsigned char *dst_ptr, + const uint8_t *dst_ptr, const int dst_pixels_per_line, unsigned int *sse) { - unsigned short FData3[16 * 3]; // Temp data buffer used in filtering - unsigned char temp2[2 * 16]; - const short *HFilter, *VFilter; + uint16_t FData3[16 * 3]; // Temp data buffer used in filtering + uint8_t temp2[2 * 16]; + const int16_t *HFilter, *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -97,16 +97,16 @@ unsigned int vp9_sub_pixel_variance16x2_c(const unsigned char *src_ptr, return vp9_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } -unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char *src_ptr, +unsigned int vp9_sub_pixel_variance2x16_c(const uint8_t *src_ptr, const int src_pixels_per_line, const int xoffset, const int yoffset, - const unsigned char *dst_ptr, + const uint8_t *dst_ptr, const int dst_pixels_per_line, unsigned int *sse) { - unsigned short FData3[2 * 17]; // Temp data buffer used in filtering - unsigned char temp2[2 * 16]; - const short *HFilter, *VFilter; + uint16_t FData3[2 * 17]; // Temp data buffer used in filtering + uint8_t temp2[2 * 16]; + const int16_t *HFilter, *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -124,16 +124,16 @@ unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char *src_ptr, * score to use as ref motion vector */ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, - unsigned char *ref_y_buffer, + uint8_t *ref_y_buffer, int ref_y_stride, int_mv *mvlist, int_mv *nearest, int_mv *near) { int i, j; - unsigned char *above_src; - unsigned char *left_src; - unsigned char *above_ref; - unsigned char *left_ref; + uint8_t *above_src; + uint8_t *left_src; + uint8_t *above_ref; + uint8_t *left_ref; unsigned int score; #if CONFIG_SUBPELREFMV unsigned int sse; diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h index be55b2ad8..a66a7de27 100644 --- a/vp9/common/vp9_findnearmv.h +++ b/vp9/common/vp9_findnearmv.h @@ -22,7 +22,7 @@ * score to use as ref motion vector */ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, - unsigned char *ref_y_buffer, + uint8_t *ref_y_buffer, int ref_y_stride, int_mv *mvlist, int_mv *nearest, @@ -81,7 +81,7 @@ vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc, vp9_prob p[VP9_MVREFS - 1], const int context); -extern const unsigned char vp9_mbsplit_offset[4][16]; +extern const uint8_t vp9_mbsplit_offset[4][16]; static int left_block_mv(const MODE_INFO *cur_mb, int b) { if (!(b & 3)) { @@ -181,4 +181,4 @@ static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, return (cur_mb->bmi + b - 4)->as_mode.first; } -#endif +#endif // VP9_COMMON_VP9_FINDNEARMV_H_ diff --git a/vp9/common/vp9_header.h b/vp9/common/vp9_header.h index 00dd17e2d..c51ce135d 100644 --- a/vp9/common/vp9_header.h +++ b/vp9/common/vp9_header.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_HEADER_H_ #define VP9_COMMON_VP9_HEADER_H_ @@ -38,5 +37,4 @@ typedef struct { #define VP9_HEADER_SIZE 3 #endif - -#endif +#endif // VP9_COMMON_VP9_HEADER_H_ diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 22c26ba12..7ce8cbee1 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -483,12 +483,12 @@ void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch, } } -void vp9_short_idct4x4llm_c(short *input, short *output, int pitch) { +void vp9_short_idct4x4llm_c(int16_t *input, int16_t *output, int pitch) { int i; int a1, b1, c1, d1; - short *ip = input; - short *op = output; + int16_t *ip = input; + int16_t *op = output; int temp1, temp2; int shortpitch = pitch >> 1; @@ -540,10 +540,10 @@ void vp9_short_idct4x4llm_c(short *input, short *output, int pitch) { } } -void vp9_short_idct4x4llm_1_c(short *input, short *output, int pitch) { +void vp9_short_idct4x4llm_1_c(int16_t *input, int16_t *output, int pitch) { int i; int a1; - short *op = output; + int16_t *op = output; int shortpitch = pitch >> 1; a1 = ((input[0] + 16) >> 5); for (i = 0; i < 4; i++) { @@ -555,8 +555,8 @@ void vp9_short_idct4x4llm_1_c(short *input, short *output, int pitch) { } } -void vp9_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, - unsigned char *dst_ptr, int pitch, int stride) { +void vp9_dc_only_idct_add_c(int input_dc, uint8_t *pred_ptr, + uint8_t *dst_ptr, int pitch, int stride) { int a1 = ((input_dc + 16) >> 5); int r, c; @@ -570,11 +570,11 @@ void vp9_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, } } -void vp9_short_inv_walsh4x4_c(short *input, short *output) { +void vp9_short_inv_walsh4x4_c(int16_t *input, int16_t *output) { int i; int a1, b1, c1, d1; - short *ip = input; - short *op = output; + int16_t *ip = input; + int16_t *op = output; for (i = 0; i < 4; i++) { a1 = ((ip[0] + ip[3])); @@ -607,11 +607,11 @@ void vp9_short_inv_walsh4x4_c(short *input, short *output) { } } -void vp9_short_inv_walsh4x4_1_c(short *in, short *out) { +void vp9_short_inv_walsh4x4_1_c(int16_t *in, int16_t *out) { int i; - short tmp[4]; - short *ip = in; - short *op = tmp; + int16_t tmp[4]; + int16_t *ip = in; + int16_t *op = tmp; op[0] = (ip[0] + 1) >> 1; op[1] = op[2] = op[3] = (ip[0] >> 1); @@ -627,11 +627,11 @@ void vp9_short_inv_walsh4x4_1_c(short *in, short *out) { } #if CONFIG_LOSSLESS -void vp9_short_inv_walsh4x4_lossless_c(short *input, short *output) { +void vp9_short_inv_walsh4x4_lossless_c(int16_t *input, int16_t *output) { int i; int a1, b1, c1, d1; - short *ip = input; - short *op = output; + int16_t *ip = input; + int16_t *op = output; for (i = 0; i < 4; i++) { a1 = ((ip[0] + ip[3])) >> Y2_WHT_UPSCALE_FACTOR; @@ -667,11 +667,11 @@ void vp9_short_inv_walsh4x4_lossless_c(short *input, short *output) { } } -void vp9_short_inv_walsh4x4_1_lossless_c(short *in, short *out) { +void vp9_short_inv_walsh4x4_1_lossless_c(int16_t *in, int16_t *out) { int i; - short tmp[4]; - short *ip = in; - short *op = tmp; + int16_t tmp[4]; + int16_t *ip = in; + int16_t *op = tmp; op[0] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) + 1) >> 1; op[1] = op[2] = op[3] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) >> 1); @@ -686,11 +686,11 @@ void vp9_short_inv_walsh4x4_1_lossless_c(short *in, short *out) { } } -void vp9_short_inv_walsh4x4_x8_c(short *input, short *output, int pitch) { +void vp9_short_inv_walsh4x4_x8_c(int16_t *input, int16_t *output, int pitch) { int i; int a1, b1, c1, d1; - short *ip = input; - short *op = output; + int16_t *ip = input; + int16_t *op = output; int shortpitch = pitch >> 1; for (i = 0; i < 4; i++) { @@ -727,11 +727,11 @@ void vp9_short_inv_walsh4x4_x8_c(short *input, short *output, int pitch) { } } -void vp9_short_inv_walsh4x4_1_x8_c(short *in, short *out, int pitch) { +void vp9_short_inv_walsh4x4_1_x8_c(int16_t *in, int16_t *out, int pitch) { int i; - short tmp[4]; - short *ip = in; - short *op = tmp; + int16_t tmp[4]; + int16_t *ip = in; + int16_t *op = tmp; int shortpitch = pitch >> 1; op[0] = ((ip[0] >> WHT_UPSCALE_FACTOR) + 1) >> 1; @@ -748,8 +748,8 @@ void vp9_short_inv_walsh4x4_1_x8_c(short *in, short *out, int pitch) { } } -void vp9_dc_only_inv_walsh_add_c(short input_dc, unsigned char *pred_ptr, - unsigned char *dst_ptr, +void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr, + uint8_t *dst_ptr, int pitch, int stride) { int r, c; short tmp[16]; @@ -767,13 +767,13 @@ void vp9_dc_only_inv_walsh_add_c(short input_dc, unsigned char *pred_ptr, #endif void vp9_dc_only_idct_add_8x8_c(short input_dc, - unsigned char *pred_ptr, - unsigned char *dst_ptr, + uint8_t *pred_ptr, + uint8_t *dst_ptr, int pitch, int stride) { int a1 = ((input_dc + 16) >> 5); int r, c, b; - unsigned char *orig_pred = pred_ptr; - unsigned char *orig_dst = dst_ptr; + uint8_t *orig_pred = pred_ptr; + uint8_t *orig_dst = dst_ptr; for (b = 0; b < 4; b++) { for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { @@ -911,7 +911,7 @@ static void idctcol(int *blk) { } #define TX_DIM 8 -void vp9_short_idct8x8_c(short *coefs, short *block, int pitch) { +void vp9_short_idct8x8_c(int16_t *coefs, int16_t *block, int pitch) { int X[TX_DIM * TX_DIM]; int i, j; int shortpitch = pitch >> 1; @@ -1030,7 +1030,7 @@ static void idctcol10(int *blk) { blk[8 * 7] = (x7 - x1) >> 14; } -void vp9_short_idct10_8x8_c(short *coefs, short *block, int pitch) { +void vp9_short_idct10_8x8_c(int16_t *coefs, int16_t *block, int pitch) { int X[TX_DIM * TX_DIM]; int i, j; int shortpitch = pitch >> 1; @@ -1043,7 +1043,7 @@ void vp9_short_idct10_8x8_c(short *coefs, short *block, int pitch) { } /* Do first 4 row idct only since non-zero dct coefficients are all in - * upper-left 4x4 area. */ + * upper-left 4x4 area. */ for (i = 0; i < 4; i++) idctrow10(X + 8 * i); @@ -1057,10 +1057,10 @@ void vp9_short_idct10_8x8_c(short *coefs, short *block, int pitch) { } } -void vp9_short_ihaar2x2_c(short *input, short *output, int pitch) { +void vp9_short_ihaar2x2_c(int16_t *input, int16_t *output, int pitch) { int i; - short *ip = input; // 0,1, 4, 8 - short *op = output; + int16_t *ip = input; // 0, 1, 4, 8 + int16_t *op = output; for (i = 0; i < 16; i++) { op[i] = 0; } @@ -1074,7 +1074,7 @@ void vp9_short_ihaar2x2_c(short *input, short *output, int pitch) { #if 0 // Keep a really bad float version as reference for now. -void vp9_short_idct16x16_c(short *input, short *output, int pitch) { +void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { vp9_clear_system_state(); // Make it simd safe : __asm emms; { @@ -1334,7 +1334,7 @@ void reference_16x16_idct_1d(double input[16], double output[16]) { } #endif -void vp9_short_idct16x16_c(short *input, short *output, int pitch) { +void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { vp9_clear_system_state(); // Make it simd safe : __asm emms; { @@ -2069,7 +2069,7 @@ static void butterfly_32_idct_1d(double *input, double *output, int stride) { output[stride*31] = step2[ 0] - step2[(31 - 15)]; } -void vp9_short_idct32x32_c(short *input, short *output, int pitch) { +void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { vp9_clear_system_state(); // Make it simd safe : __asm emms; { double out[32*32], out2[32*32]; @@ -2109,9 +2109,9 @@ void vp9_short_idct32x32_c(short *input, short *output, int pitch) { #if DWT_TYPE == 53 // Note: block length must be even for this implementation -static void synthesis_53_row(int length, short *lowpass, short *highpass, - short *x) { - short r, *a, *b; +static void synthesis_53_row(int length, int16_t *lowpass, int16_t *highpass, + int16_t *x) { + int16_t r, *a, *b; int n; n = length >> 1; @@ -2134,9 +2134,9 @@ static void synthesis_53_row(int length, short *lowpass, short *highpass, *x++ = *b + ((r + 1) >> 1); } -static void synthesis_53_col(int length, short *lowpass, short *highpass, - short *x) { - short r, *a, *b; +static void synthesis_53_col(int length, int16_t *lowpass, int16_t *highpass, + int16_t *x) { + int16_t r, *a, *b; int n; n = length >> 1; @@ -2160,8 +2160,8 @@ static void synthesis_53_col(int length, short *lowpass, short *highpass, *x++ = ((*b) << 1) + *a; } -void dyadic_synthesize_53(int levels, int width, int height, short *c, - int pitch_c, short *x, int pitch_x) { +void dyadic_synthesize_53(int levels, int width, int height, int16_t *c, + int pitch_c, int16_t *x, int pitch_x) { int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; short buffer[2 * DWT_MAX_LENGTH]; @@ -2201,9 +2201,9 @@ void dyadic_synthesize_53(int levels, int width, int height, short *c, #elif DWT_TYPE == 26 // Note: block length must be even for this implementation -static void synthesis_26_row(int length, short *lowpass, short *highpass, - short *x) { - short r, s, *a, *b; +static void synthesis_26_row(int length, int16_t *lowpass, int16_t *highpass, + int16_t *x) { + int16_t r, s, *a, *b; int i, n = length >> 1; if (n >= 4) { @@ -2226,9 +2226,9 @@ static void synthesis_26_row(int length, short *lowpass, short *highpass, } } -static void synthesis_26_col(int length, short *lowpass, short *highpass, - short *x) { - short r, s, *a, *b; +static void synthesis_26_col(int length, int16_t *lowpass, int16_t *highpass, + int16_t *x) { + int16_t r, s, *a, *b; int i, n = length >> 1; if (n >= 4) { @@ -2251,10 +2251,10 @@ static void synthesis_26_col(int length, short *lowpass, short *highpass, } } -void dyadic_synthesize_26(int levels, int width, int height, short *c, - int pitch_c, short *x, int pitch_x) { +void dyadic_synthesize_26(int levels, int width, int height, int16_t *c, + int pitch_c, int16_t *x, int pitch_x) { int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; - short buffer[2 * DWT_MAX_LENGTH]; + int16_t buffer[2 * DWT_MAX_LENGTH]; th[0] = hh; tw[0] = hw; @@ -2331,8 +2331,8 @@ static void synthesis_97(int length, double *lowpass, double *highpass, x[length - 1] -= 2 * a_predict1 * x[length - 2]; } -void dyadic_synthesize_97(int levels, int width, int height, short *c, - int pitch_c, short *x, int pitch_x) { +void dyadic_synthesize_97(int levels, int width, int height, int16_t *c, + int pitch_c, int16_t *x, int pitch_x) { int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; double buffer[2 * DWT_MAX_LENGTH]; double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH]; @@ -2578,7 +2578,7 @@ static void butterfly_16x16_idct_1d_f(double input[16], double output[16]) { vp9_clear_system_state(); // Make it simd safe : __asm emms; } -void vp9_short_idct16x16_c_f(short *input, short *output, int pitch) { +void vp9_short_idct16x16_c_f(int16_t *input, int16_t *output, int pitch) { vp9_clear_system_state(); // Make it simd safe : __asm emms; { double out[16*16], out2[16*16]; @@ -2608,12 +2608,12 @@ void vp9_short_idct16x16_c_f(short *input, short *output, int pitch) { vp9_clear_system_state(); // Make it simd safe : __asm emms; } -void vp9_short_idct32x32_c(short *input, short *output, int pitch) { +void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { // assume out is a 32x32 buffer // Temporary buffer to hold a 16x16 block for 16x16 inverse dct - short buffer[16 * 16]; + int16_t buffer[16 * 16]; // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt - short buffer2[32 * 32]; + int16_t buffer2[32 * 32]; // Note: pitch is in bytes, short_pitch is in short units const int short_pitch = pitch >> 1; int i; diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index 3abf32894..bb992a138 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -72,7 +72,7 @@ void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd) { vp9_inverse_transform_mbuv_4x4(xd); } -void vp9_inverse_transform_b_8x8(short *input_dqcoeff, short *output_coeff, +void vp9_inverse_transform_b_8x8(int16_t *input_dqcoeff, int16_t *output_coeff, int pitch) { vp9_short_idct8x8(input_dqcoeff, output_coeff, pitch); } @@ -123,8 +123,8 @@ void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd) { vp9_inverse_transform_mbuv_8x8(xd); } -void vp9_inverse_transform_b_16x16(short *input_dqcoeff, - short *output_coeff, int pitch) { +void vp9_inverse_transform_b_16x16(int16_t *input_dqcoeff, + int16_t *output_coeff, int pitch) { vp9_short_idct16x16(input_dqcoeff, output_coeff, pitch); } diff --git a/vp9/common/vp9_invtrans.h b/vp9/common/vp9_invtrans.h index 94593f8cc..586a3dc4b 100644 --- a/vp9/common/vp9_invtrans.h +++ b/vp9/common/vp9_invtrans.h @@ -12,6 +12,7 @@ #define VP9_COMMON_VP9_INVTRANS_H_ #include "vpx_ports/config.h" +#include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" extern void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch); @@ -22,8 +23,8 @@ extern void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd); extern void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd); -extern void vp9_inverse_transform_b_8x8(short *input_dqcoeff, - short *output_coeff, int pitch); +extern void vp9_inverse_transform_b_8x8(int16_t *input_dqcoeff, + int16_t *output_coeff, int pitch); extern void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd); @@ -31,8 +32,8 @@ extern void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd); extern void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd); -extern void vp9_inverse_transform_b_16x16(short *input_dqcoeff, - short *output_coeff, int pitch); +extern void vp9_inverse_transform_b_16x16(int16_t *input_dqcoeff, + int16_t *output_coeff, int pitch); extern void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd); @@ -43,4 +44,4 @@ extern void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb); extern void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb); #endif -#endif // __INC_INVTRANS_H +#endif // VP9_COMMON_VP9_INVTRANS_H_ diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 1139fb5d1..a928a9268 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -188,7 +188,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { int filter_level; - unsigned char *y_ptr, *u_ptr, *v_ptr; + uint8_t *y_ptr, *u_ptr, *v_ptr; /* Point at base of Mb MODE_INFO list */ const MODE_INFO *mode_info_context = cm->mi; @@ -325,7 +325,7 @@ void vp9_loop_filter_frame_yonly(VP9_COMMON *cm, MACROBLOCKD *xd, int default_filt_lvl) { YV12_BUFFER_CONFIG *post = cm->frame_to_show; - unsigned char *y_ptr; + uint8_t *y_ptr; int mb_row; int mb_col; @@ -425,7 +425,7 @@ void vp9_loop_filter_partial_frame(VP9_COMMON *cm, MACROBLOCKD *xd, int default_filt_lvl) { YV12_BUFFER_CONFIG *post = cm->frame_to_show; - unsigned char *y_ptr; + uint8_t *y_ptr; int mb_row; int mb_col; int mb_cols = post->y_width >> 4; diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h index 23df2d86d..f05dabf08 100644 --- a/vp9/common/vp9_loopfilter.h +++ b/vp9/common/vp9_loopfilter.h @@ -49,26 +49,26 @@ struct loop_filter_info { }; #define prototype_loopfilter(sym) \ - void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ + void sym(uint8_t *src, int pitch, const unsigned char *blimit, \ const unsigned char *limit, const unsigned char *thresh, int count) #define prototype_loopfilter_block(sym) \ - void sym(unsigned char *y, unsigned char *u, unsigned char *v, \ + void sym(uint8_t *y, uint8_t *u, uint8_t *v, \ int ystride, int uv_stride, struct loop_filter_info *lfi) #define prototype_simple_loopfilter(sym) \ - void sym(unsigned char *y, int ystride, const unsigned char *blimit) + void sym(uint8_t *y, int ystride, const unsigned char *blimit) #if ARCH_X86 || ARCH_X86_64 #include "x86/vp9_loopfilter_x86.h" #endif -typedef void loop_filter_uvfunction(unsigned char *u, /* source pointer */ +typedef void loop_filter_uvfunction(uint8_t *u, /* source pointer */ int p, /* pitch */ const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, - unsigned char *v); + uint8_t *v); /* assorted loopfilter functions which get used elsewhere */ struct VP9Common; @@ -93,4 +93,4 @@ void vp9_loop_filter_frame_yonly(struct VP9Common *cm, void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl); -#endif // loopfilter_h +#endif // VP9_COMMON_VP9_LOOPFILTER_H_ diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c index b8cfb9c1a..1419c9960 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vp9/common/vp9_loopfilter_filters.c @@ -13,20 +13,20 @@ #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" -typedef unsigned char uc; - -static __inline signed char signed_char_clamp(int t) { +static __inline int8_t signed_char_clamp(int t) { t = (t < -128 ? -128 : t); t = (t > 127 ? 127 : t); - return (signed char) t; + return (int8_t) t; } /* should we apply any filter at all ( 11111111 yes, 00000000 no) */ -static __inline signed char filter_mask(uc limit, uc blimit, - uc p3, uc p2, uc p1, uc p0, - uc q0, uc q1, uc q2, uc q3) { - signed char mask = 0; +static __inline int8_t filter_mask(uint8_t limit, uint8_t blimit, + uint8_t p3, uint8_t p2, + uint8_t p1, uint8_t p0, + uint8_t q0, uint8_t q1, + uint8_t q2, uint8_t q3) { + int8_t mask = 0; mask |= (abs(p3 - p2) > limit) * -1; mask |= (abs(p2 - p1) > limit) * -1; mask |= (abs(p1 - p0) > limit) * -1; @@ -39,26 +39,25 @@ static __inline signed char filter_mask(uc limit, uc blimit, } /* is there high variance internal edge ( 11111111 yes, 00000000 no) */ -static __inline signed char hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) { - signed char hev = 0; +static __inline int8_t hevmask(uint8_t thresh, uint8_t p1, uint8_t p0, + uint8_t q0, uint8_t q1) { + int8_t hev = 0; hev |= (abs(p1 - p0) > thresh) * -1; hev |= (abs(q1 - q0) > thresh) * -1; return hev; } -static __inline void filter(signed char mask, uc hev, uc *op1, - uc *op0, uc *oq0, uc *oq1) +static __inline void filter(int8_t mask, uint8_t hev, uint8_t *op1, + uint8_t *op0, uint8_t *oq0, uint8_t *oq1) { + int8_t ps0, qs0; + int8_t ps1, qs1; + int8_t filter, Filter1, Filter2; + int8_t u; -{ - signed char ps0, qs0; - signed char ps1, qs1; - signed char filter, Filter1, Filter2; - signed char u; - - ps1 = (signed char) * op1 ^ 0x80; - ps0 = (signed char) * op0 ^ 0x80; - qs0 = (signed char) * oq0 ^ 0x80; - qs1 = (signed char) * oq1 ^ 0x80; + ps1 = (int8_t) *op1 ^ 0x80; + ps0 = (int8_t) *op0 ^ 0x80; + qs0 = (int8_t) *oq0 ^ 0x80; + qs1 = (int8_t) *oq1 ^ 0x80; /* add outer taps if we have high edge variance */ filter = signed_char_clamp(ps1 - qs1); @@ -91,20 +90,16 @@ static __inline void filter(signed char mask, uc hev, uc *op1, *oq1 = u ^ 0x80; u = signed_char_clamp(ps1 + filter); *op1 = u ^ 0x80; - } -void vp9_loop_filter_horizontal_edge_c -( - unsigned char *s, - int p, /* pitch */ - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) { - int hev = 0; /* high edge variance */ - signed char mask = 0; +void vp9_loop_filter_horizontal_edge_c(uint8_t *s, + int p, /* pitch */ + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count) { + int hev = 0; /* high edge variance */ + int8_t mask = 0; int i = 0; /* loop filter designed to work using chars so that we can make maximum use @@ -123,14 +118,14 @@ void vp9_loop_filter_horizontal_edge_c } while (++i < count * 8); } -void vp9_loop_filter_vertical_edge_c(unsigned char *s, +void vp9_loop_filter_vertical_edge_c(uint8_t *s, int p, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { int hev = 0; /* high edge variance */ - signed char mask = 0; + int8_t mask = 0; int i = 0; /* loop filter designed to work using chars so that we can make maximum use @@ -148,10 +143,12 @@ void vp9_loop_filter_vertical_edge_c(unsigned char *s, s += p; } while (++i < count * 8); } -static __inline signed char flatmask(uc thresh, - uc p4, uc p3, uc p2, uc p1, uc p0, - uc q0, uc q1, uc q2, uc q3, uc q4) { - signed char flat = 0; +static __inline signed char flatmask(uint8_t thresh, + uint8_t p4, uint8_t p3, uint8_t p2, + uint8_t p1, uint8_t p0, + uint8_t q0, uint8_t q1, uint8_t q2, + uint8_t q3, uint8_t q4) { + int8_t flat = 0; flat |= (abs(p1 - p0) > 1) * -1; flat |= (abs(q1 - q0) > 1) * -1; flat |= (abs(p0 - p2) > 1) * -1; @@ -164,16 +161,18 @@ static __inline signed char flatmask(uc thresh, return flat; } -static __inline void mbfilter(signed char mask, uc hev, uc flat, - uc *op4, uc *op3, uc *op2, uc *op1, uc *op0, - uc *oq0, uc *oq1, uc *oq2, uc *oq3, uc *oq4) { +static __inline void mbfilter(int8_t mask, uint8_t hev, uint8_t flat, + uint8_t *op4, uint8_t *op3, uint8_t *op2, + uint8_t *op1, uint8_t *op0, + uint8_t *oq0, uint8_t *oq1, uint8_t *oq2, + uint8_t *oq3, uint8_t *oq4) { /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ if (flat && mask) { - unsigned char p0, q0; - unsigned char p1, q1; - unsigned char p2, q2; - unsigned char p3, q3; - unsigned char p4, q4; + uint8_t p0, q0; + uint8_t p1, q1; + uint8_t p2, q2; + uint8_t p3, q3; + uint8_t p4, q4; p4 = *op4; p3 = *op3; @@ -193,15 +192,15 @@ static __inline void mbfilter(signed char mask, uc hev, uc flat, *oq1 = (p1 + p0 + q0 + q1 + q1 + q2 + q3 + q4 + 4) >> 3; *oq2 = (p0 + q0 + q1 + q2 + q2 + q3 + q4 + q4 + 4) >> 3; } else { - signed char ps0, qs0; - signed char ps1, qs1; - signed char filter, Filter1, Filter2; - signed char u; + int8_t ps0, qs0; + int8_t ps1, qs1; + int8_t filter, Filter1, Filter2; + int8_t u; - ps1 = (signed char) * op1 ^ 0x80; - ps0 = (signed char) * op0 ^ 0x80; - qs0 = (signed char) * oq0 ^ 0x80; - qs1 = (signed char) * oq1 ^ 0x80; + ps1 = (int8_t) *op1 ^ 0x80; + ps0 = (int8_t) *op0 ^ 0x80; + qs0 = (int8_t) *oq0 ^ 0x80; + qs1 = (int8_t) *oq1 ^ 0x80; /* add outer taps if we have high edge variance */ filter = signed_char_clamp(ps1 - qs1); @@ -233,25 +232,22 @@ static __inline void mbfilter(signed char mask, uc hev, uc flat, *op1 = u ^ 0x80; } } -void vp9_mbloop_filter_horizontal_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) { - signed char hev = 0; /* high edge variance */ - signed char mask = 0; - signed char flat = 0; + +void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, + int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count) { + int8_t hev = 0; /* high edge variance */ + int8_t mask = 0; + int8_t flat = 0; int i = 0; /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ do { - mask = filter_mask(limit[0], blimit[0], s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p], s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]); @@ -269,22 +265,19 @@ void vp9_mbloop_filter_horizontal_edge_c } while (++i < count * 8); } -void vp9_mbloop_filter_vertical_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) { - signed char hev = 0; /* high edge variance */ - signed char mask = 0; - signed char flat = 0; + +void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, + int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count) { + int8_t hev = 0; /* high edge variance */ + int8_t mask = 0; + int8_t flat = 0; int i = 0; do { - mask = filter_mask(limit[0], blimit[0], s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]); @@ -302,26 +295,26 @@ void vp9_mbloop_filter_vertical_edge_c } /* should we apply any filter at all ( 11111111 yes, 00000000 no) */ -static __inline signed char simple_filter_mask(uc blimit, - uc p1, uc p0, - uc q0, uc q1) { +static __inline int8_t simple_filter_mask(uint8_t blimit, + uint8_t p1, uint8_t p0, + uint8_t q0, uint8_t q1) { /* Why does this cause problems for win32? * error C2143: syntax error : missing ';' before 'type' * (void) limit; */ - signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1; + int8_t mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1; return mask; } -static __inline void simple_filter(signed char mask, - uc *op1, uc *op0, - uc *oq0, uc *oq1) { - signed char filter, Filter1, Filter2; - signed char p1 = (signed char) * op1 ^ 0x80; - signed char p0 = (signed char) * op0 ^ 0x80; - signed char q0 = (signed char) * oq0 ^ 0x80; - signed char q1 = (signed char) * oq1 ^ 0x80; - signed char u; +static __inline void simple_filter(int8_t mask, + uint8_t *op1, uint8_t *op0, + uint8_t *oq0, uint8_t *oq1) { + int8_t filter, Filter1, Filter2; + int8_t p1 = (int8_t) *op1 ^ 0x80; + int8_t p0 = (int8_t) *op0 ^ 0x80; + int8_t q0 = (int8_t) *oq0 ^ 0x80; + int8_t q1 = (int8_t) *oq1 ^ 0x80; + int8_t u; filter = signed_char_clamp(p1 - q1); filter = signed_char_clamp(filter + 3 * (q0 - p0)); @@ -339,13 +332,10 @@ static __inline void simple_filter(signed char mask, *op0 = u ^ 0x80; } -void vp9_loop_filter_simple_horizontal_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit -) { - signed char mask = 0; +void vp9_loop_filter_simple_horizontal_edge_c(uint8_t *s, + int p, + const unsigned char *blimit) { + int8_t mask = 0; int i = 0; do { @@ -359,13 +349,10 @@ void vp9_loop_filter_simple_horizontal_edge_c } while (++i < 16); } -void vp9_loop_filter_simple_vertical_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit -) { - signed char mask = 0; +void vp9_loop_filter_simple_vertical_edge_c(uint8_t *s, + int p, + const unsigned char *blimit) { + int8_t mask = 0; int i = 0; do { @@ -373,12 +360,11 @@ void vp9_loop_filter_simple_vertical_edge_c simple_filter(mask, s - 2, s - 1, s, s + 1); s += p; } while (++i < 16); - } /* Vertical MB Filtering */ -void vp9_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, +void vp9_loop_filter_mbv_c(uint8_t *y_ptr, uint8_t *u_ptr, + uint8_t *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { vp9_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); @@ -393,8 +379,8 @@ void vp9_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, } /* Vertical B Filtering */ -void vp9_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, +void vp9_loop_filter_bv_c(uint8_t*y_ptr, uint8_t *u_ptr, + uint8_t *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { vp9_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); @@ -413,8 +399,8 @@ void vp9_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, } /* Horizontal MB filtering */ -void vp9_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, +void vp9_loop_filter_mbh_c(uint8_t *y_ptr, uint8_t *u_ptr, + uint8_t *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { vp9_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); @@ -429,8 +415,8 @@ void vp9_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, } /* Horizontal B Filtering */ -void vp9_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, +void vp9_loop_filter_bh_c(uint8_t *y_ptr, uint8_t *u_ptr, + uint8_t *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { vp9_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); @@ -448,14 +434,14 @@ void vp9_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, lfi->blim, lfi->lim, lfi->hev_thr, 1); } -void vp9_loop_filter_bh8x8_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, +void vp9_loop_filter_bh8x8_c(uint8_t *y_ptr, uint8_t *u_ptr, + uint8_t *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { vp9_mbloop_filter_horizontal_edge_c( y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); } -void vp9_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride, +void vp9_loop_filter_bhs_c(uint8_t *y_ptr, int y_stride, const unsigned char *blimit) { vp9_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit); @@ -465,14 +451,14 @@ void vp9_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride, y_stride, blimit); } -void vp9_loop_filter_bv8x8_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, +void vp9_loop_filter_bv8x8_c(uint8_t *y_ptr, uint8_t *u_ptr, + uint8_t *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { vp9_mbloop_filter_vertical_edge_c( y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); } -void vp9_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride, +void vp9_loop_filter_bvs_c(uint8_t *y_ptr, int y_stride, const unsigned char *blimit) { vp9_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit); vp9_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit); diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c index 84e473bb1..31162655d 100644 --- a/vp9/common/vp9_mbpitch.c +++ b/vp9/common/vp9_mbpitch.c @@ -16,17 +16,13 @@ typedef enum { DEST = 1 } BLOCKSET; -static void setup_block -( - BLOCKD *b, - int mv_stride, - unsigned char **base, - unsigned char **base2, - int Stride, - int offset, - BLOCKSET bs -) { - +static void setup_block(BLOCKD *b, + int mv_stride, + uint8_t **base, + uint8_t **base2, + int Stride, + int offset, + BLOCKSET bs) { if (bs == DEST) { b->dst_stride = Stride; b->dst = offset; @@ -37,15 +33,13 @@ static void setup_block b->base_pre = base; b->base_second_pre = base2; } - } - static void setup_macroblock(MACROBLOCKD *xd, BLOCKSET bs) { int block; - unsigned char **y, **u, **v; - unsigned char **y2, **u2, **v2; + uint8_t **y, **u, **v; + uint8_t **y2, **u2, **v2; BLOCKD *blockd = xd->block; int stride; @@ -117,7 +111,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *xd) { } void vp9_build_block_doffsets(MACROBLOCKD *xd) { - /* handle the destination pitch features */ setup_macroblock(xd, DEST); setup_macroblock(xd, PRED); diff --git a/vp9/common/vp9_modecont.h b/vp9/common/vp9_modecont.h index 122eb12d6..24f1a6cb3 100644 --- a/vp9/common/vp9_modecont.h +++ b/vp9/common/vp9_modecont.h @@ -8,9 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_MODECONT_H_ #define VP9_COMMON_VP9_MODECONT_H_ extern const int vp9_default_mode_contexts[INTER_MODE_CONTEXTS][4]; -#endif + +#endif // VP9_COMMON_VP9_MODECONT_H_ diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h index f084b1104..8acd4046b 100644 --- a/vp9/common/vp9_mv.h +++ b/vp9/common/vp9_mv.h @@ -8,14 +8,14 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_MV_H_ #define VP9_COMMON_VP9_MV_H_ + #include "vpx/vpx_integer.h" typedef struct { - short row; - short col; + int16_t row; + int16_t col; } MV; typedef union int_mv { @@ -23,4 +23,4 @@ typedef union int_mv { MV as_mv; } int_mv; /* facilitates faster equality tests and copies */ -#endif +#endif // VP9_COMMON_VP9_MV_H_ diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h index 1938352c4..ca6d89e91 100644 --- a/vp9/common/vp9_mvref_common.h +++ b/vp9/common/vp9_mvref_common.h @@ -11,18 +11,14 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_blockd.h" - #ifndef VP9_COMMON_VP9_MVREF_COMMON_H_ #define VP9_COMMON_VP9_MVREF_COMMON_H_ -void vp9_find_mv_refs( - MACROBLOCKD *xd, - MODE_INFO *here, - MODE_INFO *lf_here, - MV_REFERENCE_FRAME ref_frame, - int_mv * mv_ref_list, - int *ref_sign_bias -); - -#endif +void vp9_find_mv_refs(MACROBLOCKD *xd, + MODE_INFO *here, + MODE_INFO *lf_here, + MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, + int *ref_sign_bias); +#endif // VP9_COMMON_VP9_MVREF_COMMON_H_ diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h index 0e157f1de..e4ad72f21 100644 --- a/vp9/common/vp9_onyx.h +++ b/vp9/common/vp9_onyx.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_ONYX_H_ #define VP9_COMMON_VP9_ONYX_H_ @@ -221,4 +220,4 @@ extern "C" } #endif -#endif // __INC_ONYX_H +#endif // VP9_COMMON_VP9_ONYX_H_ diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index cc0878c9e..d96e76c86 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_ONYXC_INT_H_ #define VP9_COMMON_VP9_ONYXC_INT_H_ @@ -142,9 +141,9 @@ typedef enum { typedef struct VP9Common { struct vpx_internal_error_info error; - DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, int16_t, Y1dequant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, int16_t, Y2dequant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, int16_t, UVdequant[QINDEX_RANGE][16]); int Width; int Height; @@ -299,4 +298,4 @@ typedef struct VP9Common { } VP9_COMMON; -#endif // __INC_ONYX_INT_H +#endif // VP9_COMMON_VP9_ONYXC_INT_H_ diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c index 820630251..80a952d84 100644 --- a/vp9/common/vp9_postproc.c +++ b/vp9/common/vp9_postproc.c @@ -132,20 +132,20 @@ const short vp9_rv[] = { /**************************************************************************** */ -void vp9_post_proc_down_and_across_c(unsigned char *src_ptr, - unsigned char *dst_ptr, +void vp9_post_proc_down_and_across_c(uint8_t *src_ptr, + uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit) { - unsigned char *p_src, *p_dst; + uint8_t *p_src, *p_dst; int row; int col; int i; int v; int pitch = src_pixels_per_line; - unsigned char d[8]; + uint8_t d[8]; (void)dst_pixels_per_line; for (row = 0; row < rows; row++) { @@ -215,12 +215,12 @@ static int q2mbl(int x) { return x * x / 3; } -void vp9_mbpost_proc_across_ip_c(unsigned char *src, int pitch, +void vp9_mbpost_proc_across_ip_c(uint8_t *src, int pitch, int rows, int cols, int flimit) { int r, c, i; - unsigned char *s = src; - unsigned char d[16]; + uint8_t *s = src; + uint8_t d[16]; for (r = 0; r < rows; r++) { @@ -253,16 +253,16 @@ void vp9_mbpost_proc_across_ip_c(unsigned char *src, int pitch, } } -void vp9_mbpost_proc_down_c(unsigned char *dst, int pitch, +void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int flimit) { int r, c, i; const short *rv3 = &vp9_rv[63 & rand()]; for (c = 0; c < cols; c++) { - unsigned char *s = &dst[c]; + uint8_t *s = &dst[c]; int sumsq = 0; int sum = 0; - unsigned char d[16]; + uint8_t d[16]; const short *rv2 = rv3 + ((c * 17) & 127); for (i = -8; i <= 6; i++) { @@ -439,7 +439,7 @@ static void fillrd(struct postproc_state *state, int q, int a) { * SPECIAL NOTES : None. * ****************************************************************************/ -void vp9_plane_add_noise_c(unsigned char *Start, char *noise, +void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], @@ -447,7 +447,7 @@ void vp9_plane_add_noise_c(unsigned char *Start, char *noise, unsigned int i, j; for (i = 0; i < Height; i++) { - unsigned char *Pos = Start + i * Pitch; + uint8_t *Pos = Start + i * Pitch; char *Ref = (char *)(noise + (rand() & 0xff)); for (j = 0; j < Width; j++) { @@ -466,7 +466,7 @@ void vp9_plane_add_noise_c(unsigned char *Start, char *noise, * edges unblended to give distinction to macro blocks in areas * filled with the same color block. */ -void vp9_blend_mb_inner_c(unsigned char *y, unsigned char *u, unsigned char *v, +void vp9_blend_mb_inner_c(uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride) { int i, j; int y1_const = y1 * ((1 << 16) - alpha); @@ -499,7 +499,7 @@ void vp9_blend_mb_inner_c(unsigned char *y, unsigned char *u, unsigned char *v, /* Blend only the edge of the macro block. Leave center * unblended to allow for other visualizations to be layered. */ -void vp9_blend_mb_outer_c(unsigned char *y, unsigned char *u, unsigned char *v, +void vp9_blend_mb_outer_c(uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride) { int i, j; int y1_const = y1 * ((1 << 16) - alpha); @@ -554,7 +554,7 @@ void vp9_blend_mb_outer_c(unsigned char *y, unsigned char *u, unsigned char *v, } } -void vp9_blend_b_c(unsigned char *y, unsigned char *u, unsigned char *v, +void vp9_blend_b_c(uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride) { int i, j; int y1_const = y1 * ((1 << 16) - alpha); @@ -688,7 +688,7 @@ int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest, if (flags & VP9D_DEBUG_TXT_MBLK_MODES) { int i, j; - unsigned char *y_ptr; + uint8_t *y_ptr; YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; int mb_rows = post->y_height >> 4; int mb_cols = post->y_width >> 4; @@ -717,7 +717,7 @@ int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest, if (flags & VP9D_DEBUG_TXT_DC_DIFF) { int i, j; - unsigned char *y_ptr; + uint8_t *y_ptr; YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; int mb_rows = post->y_height >> 4; int mb_cols = post->y_width >> 4; @@ -764,7 +764,7 @@ int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest, YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; int width = post->y_width; int height = post->y_height; - unsigned char *y_buffer = oci->post_proc_buffer.y_buffer; + uint8_t *y_buffer = oci->post_proc_buffer.y_buffer; int y_stride = oci->post_proc_buffer.y_stride; MODE_INFO *mi = oci->mi; int x0, y0; @@ -906,9 +906,9 @@ int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest, YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; int width = post->y_width; int height = post->y_height; - unsigned char *y_ptr = oci->post_proc_buffer.y_buffer; - unsigned char *u_ptr = oci->post_proc_buffer.u_buffer; - unsigned char *v_ptr = oci->post_proc_buffer.v_buffer; + uint8_t *y_ptr = oci->post_proc_buffer.y_buffer; + uint8_t *u_ptr = oci->post_proc_buffer.u_buffer; + uint8_t *v_ptr = oci->post_proc_buffer.v_buffer; int y_stride = oci->post_proc_buffer.y_stride; MODE_INFO *mi = oci->mi; @@ -920,7 +920,7 @@ int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest, ((ppflags->display_mb_modes_flag & B_PRED) || ppflags->display_b_modes_flag)) { int by, bx; - unsigned char *yl, *ul, *vl; + uint8_t *yl, *ul, *vl; union b_mode_info *bmi = mi->bmi; yl = y_ptr + x; @@ -971,9 +971,9 @@ int vp9_post_proc_frame(VP9_COMMON *oci, YV12_BUFFER_CONFIG *dest, YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; int width = post->y_width; int height = post->y_height; - unsigned char *y_ptr = oci->post_proc_buffer.y_buffer; - unsigned char *u_ptr = oci->post_proc_buffer.u_buffer; - unsigned char *v_ptr = oci->post_proc_buffer.v_buffer; + uint8_t *y_ptr = oci->post_proc_buffer.y_buffer; + uint8_t *u_ptr = oci->post_proc_buffer.u_buffer; + uint8_t *v_ptr = oci->post_proc_buffer.v_buffer; int y_stride = oci->post_proc_buffer.y_stride; MODE_INFO *mi = oci->mi; diff --git a/vp9/common/vp9_postproc.h b/vp9/common/vp9_postproc.h index 0628d84e9..11f55ab0a 100644 --- a/vp9/common/vp9_postproc.h +++ b/vp9/common/vp9_postproc.h @@ -38,4 +38,5 @@ void vp9_deblock(YV12_BUFFER_CONFIG *source, int q, int low_var_thresh, int flag); -#endif + +#endif // VP9_COMMON_VP9_POSTPROC_H_ diff --git a/vp9/common/vp9_ppflags.h b/vp9/common/vp9_ppflags.h index 293d8d5cc..561c93028 100644 --- a/vp9/common/vp9_ppflags.h +++ b/vp9/common/vp9_ppflags.h @@ -8,9 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_PPFLAGS_H_ #define VP9_COMMON_VP9_PPFLAGS_H_ + enum { VP9D_NOFILTERING = 0, VP9D_DEBLOCK = 1 << 0, @@ -35,4 +35,4 @@ typedef struct { int display_mv_flag; } vp9_ppflags_t; -#endif +#endif // VP9_COMMON_VP9_PPFLAGS_H_ diff --git a/vp9/common/vp9_pragmas.h b/vp9/common/vp9_pragmas.h index 99fee5ae2..cbeaf5370 100644 --- a/vp9/common/vp9_pragmas.h +++ b/vp9/common/vp9_pragmas.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ - - +#ifndef VP9_COMMON_VP9_PRAGMAS_H_ +#define VP9_COMMON_VP9_PRAGMAS_H_ #ifdef __INTEL_COMPILER #pragma warning(disable:997 1011 170) @@ -17,3 +17,5 @@ #ifdef _MSC_VER #pragma warning(disable:4799) #endif + +#endif // VP9_COMMON_VP9_PRAGMAS_H_ diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h index 71b46af64..52c4d42ef 100644 --- a/vp9/common/vp9_pred_common.h +++ b/vp9/common/vp9_pred_common.h @@ -52,4 +52,4 @@ extern MV_REFERENCE_FRAME vp9_get_pred_ref(const VP9_COMMON *const cm, const MACROBLOCKD *const xd); extern void vp9_compute_mod_refprobs(VP9_COMMON *const cm); -#endif /* __INC_PRED_COMMON_H__ */ +#endif // VP9_COMMON_VP9_PRED_COMMON_H_ diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h index 6e0555e09..871c2b035 100644 --- a/vp9/common/vp9_quant_common.h +++ b/vp9/common/vp9_quant_common.h @@ -8,15 +8,19 @@ * be found in the AUTHORS file in the root of the source tree. */ +#ifndef VP9_COMMON_VP9_QUANT_COMMON_H_ +#define VP9_COMMON_VP9_QUANT_COMMON_H_ #include "string.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_onyxc_int.h" -extern void vp9_init_quant_tables(); +extern void vp9_init_quant_tables(void); extern int vp9_ac_yquant(int QIndex); extern int vp9_dc_quant(int QIndex, int Delta); extern int vp9_dc2quant(int QIndex, int Delta); extern int vp9_ac2quant(int QIndex, int Delta); extern int vp9_dc_uv_quant(int QIndex, int Delta); extern int vp9_ac_uv_quant(int QIndex, int Delta); + +#endif // VP9_COMMON_VP9_QUANT_COMMON_H_ diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c index d8bcd77c0..7673c3727 100644 --- a/vp9/common/vp9_recon.c +++ b/vp9/common/vp9_recon.c @@ -13,13 +13,10 @@ #include "vp9_rtcd.h" #include "vp9/common/vp9_blockd.h" -void vp9_recon_b_c -( - unsigned char *pred_ptr, - short *diff_ptr, - unsigned char *dst_ptr, - int stride -) { +void vp9_recon_b_c(uint8_t *pred_ptr, + int16_t *diff_ptr, + uint8_t *dst_ptr, + int stride) { int r, c; for (r = 0; r < 4; r++) { @@ -33,13 +30,10 @@ void vp9_recon_b_c } } -void vp9_recon_uv_b_c -( - unsigned char *pred_ptr, - short *diff_ptr, - unsigned char *dst_ptr, - int stride -) { +void vp9_recon_uv_b_c(uint8_t *pred_ptr, + int16_t *diff_ptr, + uint8_t *dst_ptr, + int stride) { int r, c; for (r = 0; r < 4; r++) { @@ -52,13 +46,11 @@ void vp9_recon_uv_b_c pred_ptr += 8; } } -void vp9_recon4b_c -( - unsigned char *pred_ptr, - short *diff_ptr, - unsigned char *dst_ptr, - int stride -) { + +void vp9_recon4b_c(uint8_t *pred_ptr, + int16_t *diff_ptr, + uint8_t *dst_ptr, + int stride) { int r, c; for (r = 0; r < 4; r++) { @@ -72,13 +64,10 @@ void vp9_recon4b_c } } -void vp9_recon2b_c -( - unsigned char *pred_ptr, - short *diff_ptr, - unsigned char *dst_ptr, - int stride -) { +void vp9_recon2b_c(uint8_t *pred_ptr, + int16_t *diff_ptr, + uint8_t *dst_ptr, + int stride) { int r, c; for (r = 0; r < 4; r++) { @@ -97,7 +86,7 @@ void vp9_recon_mby_s_c(MACROBLOCKD *xd, uint8_t *dst) { int x, y; BLOCKD *b = &xd->block[0]; int stride = b->dst_stride; - short *diff = b->diff; + int16_t *diff = b->diff; for (y = 0; y < 16; y++) { for (x = 0; x < 16; x++) { @@ -115,7 +104,7 @@ void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { for (i = 0; i < 2; i++, dst = vdst) { BLOCKD *b = &xd->block[16 + 4 * i]; int stride = b->dst_stride; - short *diff = b->diff; + int16_t *diff = b->diff; for (y = 0; y < 8; y++) { for (x = 0; x < 8; x++) { @@ -130,7 +119,7 @@ void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { #if CONFIG_TX32X32 void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) { int x, y, stride = xd->block[0].dst_stride; - short *diff = xd->sb_coeff_data.diff; + int16_t *diff = xd->sb_coeff_data.diff; for (y = 0; y < 32; y++) { for (x = 0; x < 32; x++) { @@ -143,8 +132,8 @@ void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) { void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { int x, y, stride = xd->block[16].dst_stride; - short *udiff = xd->sb_coeff_data.diff + 1024; - short *vdiff = xd->sb_coeff_data.diff + 1280; + int16_t *udiff = xd->sb_coeff_data.diff + 1024; + int16_t *vdiff = xd->sb_coeff_data.diff + 1280; for (y = 0; y < 16; y++) { for (x = 0; x < 16; x++) { diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 11d1c97b1..01d332f79 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -42,8 +42,7 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd, xd->subpixel_predict_avg = vp9_eighttap_predict_avg4x4_sharp; xd->subpixel_predict_avg8x8 = vp9_eighttap_predict_avg8x8_sharp; xd->subpixel_predict_avg16x16 = vp9_eighttap_predict_avg16x16_sharp_c; - } - else { + } else { xd->subpixel_predict = vp9_bilinear_predict4x4; xd->subpixel_predict8x4 = vp9_bilinear_predict8x4; xd->subpixel_predict8x8 = vp9_bilinear_predict8x8; @@ -54,9 +53,9 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd, } } -void vp9_copy_mem16x16_c(unsigned char *src, +void vp9_copy_mem16x16_c(uint8_t *src, int src_stride, - unsigned char *dst, + uint8_t *dst, int dst_stride) { int r; @@ -91,9 +90,9 @@ void vp9_copy_mem16x16_c(unsigned char *src, } } -void vp9_avg_mem16x16_c(unsigned char *src, +void vp9_avg_mem16x16_c(uint8_t *src, int src_stride, - unsigned char *dst, + uint8_t *dst, int dst_stride) { int r; @@ -109,9 +108,9 @@ void vp9_avg_mem16x16_c(unsigned char *src, } } -void vp9_copy_mem8x8_c(unsigned char *src, +void vp9_copy_mem8x8_c(uint8_t *src, int src_stride, - unsigned char *dst, + uint8_t *dst, int dst_stride) { int r; @@ -134,9 +133,9 @@ void vp9_copy_mem8x8_c(unsigned char *src, } } -void vp9_avg_mem8x8_c(unsigned char *src, +void vp9_avg_mem8x8_c(uint8_t *src, int src_stride, - unsigned char *dst, + uint8_t *dst, int dst_stride) { int r; @@ -152,9 +151,9 @@ void vp9_avg_mem8x8_c(unsigned char *src, } } -void vp9_copy_mem8x4_c(unsigned char *src, +void vp9_copy_mem8x4_c(uint8_t *src, int src_stride, - unsigned char *dst, + uint8_t *dst, int dst_stride) { int r; @@ -179,9 +178,9 @@ void vp9_copy_mem8x4_c(unsigned char *src, void vp9_build_inter_predictors_b(BLOCKD *d, int pitch, vp9_subpix_fn_t sppf) { int r; - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *pred_ptr = d->predictor; + uint8_t *ptr_base; + uint8_t *ptr; + uint8_t *pred_ptr = d->predictor; int_mv mv; ptr_base = *(d->base_pre); @@ -221,9 +220,9 @@ void vp9_build_inter_predictors_b(BLOCKD *d, int pitch, vp9_subpix_fn_t sppf) { void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch, vp9_subpix_fn_t sppf) { int r; - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *pred_ptr = d->predictor; + uint8_t *ptr_base; + uint8_t *ptr; + uint8_t *pred_ptr = d->predictor; int_mv mv; ptr_base = *(d->base_second_pre); @@ -251,9 +250,9 @@ void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch, } void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d, int pitch) { - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *pred_ptr = d->predictor; + uint8_t *ptr_base; + uint8_t *ptr; + uint8_t *pred_ptr = d->predictor; int_mv mv; ptr_base = *(d->base_pre); @@ -277,9 +276,9 @@ void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d, int pitch) { */ void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d, int pitch) { - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *pred_ptr = d->predictor; + uint8_t *ptr_base; + uint8_t *ptr; + uint8_t *pred_ptr = d->predictor; int_mv mv; ptr_base = *(d->base_second_pre); @@ -296,9 +295,9 @@ void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd, } static void build_inter_predictors2b(MACROBLOCKD *xd, BLOCKD *d, int pitch) { - unsigned char *ptr_base; - unsigned char *ptr; - unsigned char *pred_ptr = d->predictor; + uint8_t *ptr_base; + uint8_t *ptr; + uint8_t *pred_ptr = d->predictor; int_mv mv; ptr_base = *(d->base_pre); @@ -328,13 +327,13 @@ static const int pred_filter[PRED_FILT_LEN] = {1, 2, 10, 2, 1}; // Alternative filter {1, 1, 4, 1, 1} #if !USE_THRESH_FILTER -void filter_mb(unsigned char *src, int src_stride, - unsigned char *dst, int dst_stride, +void filter_mb(uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, int width, int height) { int i, j, k; unsigned int temp[32 * 32]; unsigned int *pTmp = temp; - unsigned char *pSrc = src - (1 + src_stride) * (PRED_FILT_LEN / 2); + uint8_t *pSrc = src - (1 + src_stride) * (PRED_FILT_LEN / 2); // Horizontal for (i = 0; i < height + PRED_FILT_LEN - 1; i++) { @@ -352,7 +351,7 @@ void filter_mb(unsigned char *src, int src_stride, // Vertical pTmp = temp; for (i = 0; i < width; i++) { - unsigned char *pDst = dst + i; + uint8_t *pDst = dst + i; for (j = 0; j < height; j++) { int sum = 0; for (k = 0; k < PRED_FILT_LEN; k++) @@ -366,15 +365,15 @@ void filter_mb(unsigned char *src, int src_stride, } #else // Based on vp9_post_proc_down_and_across_c (vp9_postproc.c) -void filter_mb(unsigned char *src, int src_stride, - unsigned char *dst, int dst_stride, +void filter_mb(uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, int width, int height) { - unsigned char *pSrc, *pDst; + uint8_t *pSrc, *pDst; int row; int col; int i; int v; - unsigned char d[8]; + uint8_t d[8]; /* TODO flimit should be linked to the quantizer value */ int flimit = 7; @@ -573,11 +572,11 @@ static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { /*encoder only*/ void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd, - unsigned char *dst_y, + uint8_t *dst_y, int dst_ystride, int clamp_mvs) { - unsigned char *ptr_base = xd->pre.y_buffer; - unsigned char *ptr; + uint8_t *ptr_base = xd->pre.y_buffer; + uint8_t *ptr; int pre_stride = xd->block[0].pre_stride; int_mv ymv; @@ -593,8 +592,8 @@ void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd, if ((ymv.as_mv.row | ymv.as_mv.col) & 7) { // Sub-pel filter needs extended input int len = 15 + (VP9_INTERP_EXTEND << 1); - unsigned char Temp[32 * 32]; // Data required by sub-pel filter - unsigned char *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); + uint8_t Temp[32 * 32]; // Data required by sub-pel filter + uint8_t *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); // Copy extended MB into Temp array, applying the spatial filter filter_mb(ptr - (VP9_INTERP_EXTEND - 1) * (pre_stride + 1), pre_stride, @@ -622,11 +621,11 @@ void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd, } void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - unsigned char *dst_u, - unsigned char *dst_v, + uint8_t *dst_u, + uint8_t *dst_v, int dst_uvstride) { int offset; - unsigned char *uptr, *vptr; + uint8_t *uptr, *vptr; int pre_stride = xd->block[0].pre_stride; int_mv _o16x16mv; int_mv _16x16mv; @@ -662,11 +661,11 @@ void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd, #if CONFIG_PRED_FILTER if (xd->mode_info_context->mbmi.pred_filter_enabled) { int i; - unsigned char *pSrc = uptr; - unsigned char *pDst = dst_u; + uint8_t *pSrc = uptr; + uint8_t *pDst = dst_u; int len = 7 + (VP9_INTERP_EXTEND << 1); - unsigned char Temp[32 * 32]; // Data required by the sub-pel filter - unsigned char *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); + uint8_t Temp[32 * 32]; // Data required by the sub-pel filter + uint8_t *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); // U & V for (i = 0; i < 2; i++) { @@ -703,9 +702,9 @@ void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd, void vp9_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd, - unsigned char *dst_y, - unsigned char *dst_u, - unsigned char *dst_v, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, int dst_ystride, int dst_uvstride) { vp9_build_1st_inter16x16_predictors_mby(xd, dst_y, dst_ystride, xd->mode_info_context->mbmi.need_to_clamp_mvs); @@ -714,9 +713,9 @@ void vp9_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd, #if CONFIG_SUPERBLOCKS void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, - unsigned char *dst_y, - unsigned char *dst_u, - unsigned char *dst_v, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, int dst_ystride, int dst_uvstride) { uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; @@ -798,15 +797,15 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, * single reference framer. */ void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd, - unsigned char *dst_y, + uint8_t *dst_y, int dst_ystride) { - unsigned char *ptr; + uint8_t *ptr; int_mv _16x16mv; int mv_row; int mv_col; - unsigned char *ptr_base = xd->second_pre.y_buffer; + uint8_t *ptr_base = xd->second_pre.y_buffer; int pre_stride = xd->block[0].pre_stride; _16x16mv.as_int = xd->mode_info_context->mbmi.mv[1].as_int; @@ -824,8 +823,8 @@ void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd, if ((mv_row | mv_col) & 7) { // Sub-pel filter needs extended input int len = 15 + (VP9_INTERP_EXTEND << 1); - unsigned char Temp[32 * 32]; // Data required by sub-pel filter - unsigned char *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); + uint8_t Temp[32 * 32]; // Data required by sub-pel filter + uint8_t *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); // Copy extended MB into Temp array, applying the spatial filter filter_mb(ptr - (VP9_INTERP_EXTEND - 1) * (pre_stride + 1), pre_stride, @@ -852,11 +851,11 @@ void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd, } void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - unsigned char *dst_u, - unsigned char *dst_v, + uint8_t *dst_u, + uint8_t *dst_v, int dst_uvstride) { int offset; - unsigned char *uptr, *vptr; + uint8_t *uptr, *vptr; int_mv _16x16mv; int mv_row; @@ -891,10 +890,10 @@ void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd, if (xd->mode_info_context->mbmi.pred_filter_enabled) { int i; int len = 7 + (VP9_INTERP_EXTEND << 1); - unsigned char Temp[32 * 32]; // Data required by sub-pel filter - unsigned char *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); - unsigned char *pSrc = uptr; - unsigned char *pDst = dst_u; + uint8_t Temp[32 * 32]; // Data required by sub-pel filter + uint8_t *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); + uint8_t *pSrc = uptr; + uint8_t *pDst = dst_u; // U & V for (i = 0; i < 2; i++) { @@ -930,9 +929,9 @@ void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd, } void vp9_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd, - unsigned char *dst_y, - unsigned char *dst_u, - unsigned char *dst_v, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, int dst_ystride, int dst_uvstride) { vp9_build_2nd_inter16x16_predictors_mby(xd, dst_y, dst_ystride); diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index f3292f148..b104f835d 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -11,46 +11,47 @@ #ifndef VP9_COMMON_VP9_RECONINTER_H_ #define VP9_COMMON_VP9_RECONINTER_H_ +#include "vpx/vpx_integer.h" #include "vp9/common/vp9_onyxc_int.h" extern void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd, - unsigned char *dst_y, + uint8_t *dst_y, int dst_ystride, int clamp_mvs); extern void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - unsigned char *dst_u, - unsigned char *dst_v, + uint8_t *dst_u, + uint8_t *dst_v, int dst_uvstride); extern void vp9_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd, - unsigned char *dst_y, - unsigned char *dst_u, - unsigned char *dst_v, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, int dst_ystride, int dst_uvstride); extern void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd, - unsigned char *dst_y, + uint8_t *dst_y, int dst_ystride); extern void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd, - unsigned char *dst_u, - unsigned char *dst_v, + uint8_t *dst_u, + uint8_t *dst_v, int dst_uvstride); extern void vp9_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd, - unsigned char *dst_y, - unsigned char *dst_u, - unsigned char *dst_v, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, int dst_ystride, int dst_uvstride); #if CONFIG_SUPERBLOCKS extern void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, - unsigned char *dst_y, - unsigned char *dst_u, - unsigned char *dst_v, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, int dst_ystride, int dst_uvstride); #endif @@ -75,4 +76,4 @@ extern void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERPOLATIONFILTERTYPE filter, VP9_COMMON *cm); -#endif // __INC_RECONINTER_H +#endif // VP9_COMMON_VP9_RECONINTER_H_ diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 14f94820d..b893df151 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -198,9 +198,9 @@ static void d153_predictor(uint8_t *ypred_ptr, int y_stride, int n, } } -static void corner_predictor(unsigned char *ypred_ptr, int y_stride, int n, - unsigned char *yabove_row, - unsigned char *yleft_col) { +static void corner_predictor(uint8_t *ypred_ptr, int y_stride, int n, + uint8_t *yabove_row, + uint8_t *yleft_col) { int mh, mv, maxgradh, maxgradv, x, y, nx, ny; int i, j; int top_left = yabove_row[-1]; @@ -248,14 +248,14 @@ void vp9_recon_intra_mbuv(MACROBLOCKD *xd) { } } -void vp9_build_intra_predictors_internal(unsigned char *src, int src_stride, - unsigned char *ypred_ptr, +void vp9_build_intra_predictors_internal(uint8_t *src, int src_stride, + uint8_t *ypred_ptr, int y_stride, int mode, int bsize, int up_available, int left_available) { - unsigned char *yabove_row = src - src_stride; - unsigned char yleft_col[32]; - unsigned char ytop_left = yabove_row[-1]; + uint8_t *yabove_row = src - src_stride; + uint8_t yleft_col[32]; + uint8_t ytop_left = yabove_row[-1]; int r, c, i; for (i = 0; i < bsize; i++) { @@ -368,9 +368,9 @@ void vp9_build_intra_predictors_internal(unsigned char *src, int src_stride, #if CONFIG_COMP_INTERINTRA_PRED static void combine_interintra(MB_PREDICTION_MODE mode, - unsigned char *interpred, + uint8_t *interpred, int interstride, - unsigned char *intrapred, + uint8_t *intrapred, int intrastride, int size) { // TODO(debargha): Explore different ways of combining predictors @@ -607,18 +607,18 @@ static void combine_interintra(MB_PREDICTION_MODE mode, } void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd, - unsigned char *ypred, - unsigned char *upred, - unsigned char *vpred, + uint8_t *ypred, + uint8_t *upred, + uint8_t *vpred, int ystride, int uvstride) { vp9_build_interintra_16x16_predictors_mby(xd, ypred, ystride); vp9_build_interintra_16x16_predictors_mbuv(xd, upred, vpred, uvstride); } void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd, - unsigned char *ypred, + uint8_t *ypred, int ystride) { - unsigned char intrapredictor[256]; + uint8_t intrapredictor[256]; vp9_build_intra_predictors_internal( xd->dst.y_buffer, xd->dst.y_stride, intrapredictor, 16, @@ -629,11 +629,11 @@ void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd, } void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd, - unsigned char *upred, - unsigned char *vpred, + uint8_t *upred, + uint8_t *vpred, int uvstride) { - unsigned char uintrapredictor[64]; - unsigned char vintrapredictor[64]; + uint8_t uintrapredictor[64]; + uint8_t vintrapredictor[64]; vp9_build_intra_predictors_internal( xd->dst.u_buffer, xd->dst.uv_stride, uintrapredictor, 8, @@ -652,9 +652,9 @@ void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd, #if CONFIG_SUPERBLOCKS void vp9_build_interintra_32x32_predictors_sby(MACROBLOCKD *xd, - unsigned char *ypred, + uint8_t *ypred, int ystride) { - unsigned char intrapredictor[1024]; + uint8_t intrapredictor[1024]; vp9_build_intra_predictors_internal( xd->dst.y_buffer, xd->dst.y_stride, intrapredictor, 32, @@ -665,11 +665,11 @@ void vp9_build_interintra_32x32_predictors_sby(MACROBLOCKD *xd, } void vp9_build_interintra_32x32_predictors_sbuv(MACROBLOCKD *xd, - unsigned char *upred, - unsigned char *vpred, + uint8_t *upred, + uint8_t *vpred, int uvstride) { - unsigned char uintrapredictor[256]; - unsigned char vintrapredictor[256]; + uint8_t uintrapredictor[256]; + uint8_t vintrapredictor[256]; vp9_build_intra_predictors_internal( xd->dst.u_buffer, xd->dst.uv_stride, uintrapredictor, 16, @@ -687,9 +687,9 @@ void vp9_build_interintra_32x32_predictors_sbuv(MACROBLOCKD *xd, } void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, - unsigned char *ypred, - unsigned char *upred, - unsigned char *vpred, + uint8_t *ypred, + uint8_t *upred, + uint8_t *vpred, int ystride, int uvstride) { vp9_build_interintra_32x32_predictors_sby(xd, ypred, ystride); @@ -723,7 +723,7 @@ void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd) { #if CONFIG_COMP_INTRA_PRED void vp9_build_comp_intra_predictors_mby(MACROBLOCKD *xd) { - unsigned char predictor[2][256]; + uint8_t predictor[2][256]; int i; vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, @@ -744,8 +744,8 @@ void vp9_build_comp_intra_predictors_mby(MACROBLOCKD *xd) { #endif void vp9_build_intra_predictors_mbuv_internal(MACROBLOCKD *xd, - unsigned char *upred_ptr, - unsigned char *vpred_ptr, + uint8_t *upred_ptr, + uint8_t *vpred_ptr, int uv_stride, int mode, int bsize) { vp9_build_intra_predictors_internal(xd->dst.u_buffer, xd->dst.uv_stride, @@ -782,7 +782,7 @@ void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd) { #if CONFIG_COMP_INTRA_PRED void vp9_build_comp_intra_predictors_mbuv(MACROBLOCKD *xd) { - unsigned char predictor[2][2][64]; + uint8_t predictor[2][2][64]; int i; vp9_build_intra_predictors_mbuv_internal( @@ -801,7 +801,7 @@ void vp9_build_comp_intra_predictors_mbuv(MACROBLOCKD *xd) { void vp9_intra8x8_predict(BLOCKD *xd, int mode, - unsigned char *predictor) { + uint8_t *predictor) { vp9_build_intra_predictors_internal(*(xd->base_dst) + xd->dst, xd->dst_stride, predictor, 16, mode, 8, 1, 1); @@ -810,8 +810,8 @@ void vp9_intra8x8_predict(BLOCKD *xd, #if CONFIG_COMP_INTRA_PRED void vp9_comp_intra8x8_predict(BLOCKD *xd, int mode, int second_mode, - unsigned char *out_predictor) { - unsigned char predictor[2][8 * 16]; + uint8_t *out_predictor) { + uint8_t predictor[2][8 * 16]; int i, j; vp9_intra8x8_predict(xd, mode, predictor[0]); @@ -827,7 +827,7 @@ void vp9_comp_intra8x8_predict(BLOCKD *xd, void vp9_intra_uv4x4_predict(BLOCKD *xd, int mode, - unsigned char *predictor) { + uint8_t *predictor) { vp9_build_intra_predictors_internal(*(xd->base_dst) + xd->dst, xd->dst_stride, predictor, 8, mode, 4, 1, 1); @@ -836,8 +836,8 @@ void vp9_intra_uv4x4_predict(BLOCKD *xd, #if CONFIG_COMP_INTRA_PRED void vp9_comp_intra_uv4x4_predict(BLOCKD *xd, int mode, int mode2, - unsigned char *out_predictor) { - unsigned char predictor[2][8 * 4]; + uint8_t *out_predictor) { + uint8_t predictor[2][8 * 4]; int i, j; vp9_intra_uv4x4_predict(xd, mode, predictor[0]); diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h index 92882d318..f3016dd79 100644 --- a/vp9/common/vp9_reconintra.h +++ b/vp9/common/vp9_reconintra.h @@ -11,34 +11,36 @@ #ifndef VP9_COMMON_VP9_RECONINTRA_H_ #define VP9_COMMON_VP9_RECONINTRA_H_ +#include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" extern void vp9_recon_intra_mbuv(MACROBLOCKD *xd); -extern B_PREDICTION_MODE vp9_find_dominant_direction(unsigned char *ptr, +extern B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, int stride, int n); extern B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x); #if CONFIG_COMP_INTERINTRA_PRED extern void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd, - unsigned char *ypred, - unsigned char *upred, - unsigned char *vpred, + uint8_t *ypred, + uint8_t *upred, + uint8_t *vpred, int ystride, int uvstride); extern void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd, - unsigned char *ypred, + uint8_t *ypred, int ystride); extern void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd, - unsigned char *upred, - unsigned char *vpred, + uint8_t *upred, + uint8_t *vpred, int uvstride); +#endif // CONFIG_COMP_INTERINTRA_PRED + #if CONFIG_SUPERBLOCKS extern void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, - unsigned char *ypred, - unsigned char *upred, - unsigned char *vpred, + uint8_t *ypred, + uint8_t *upred, + uint8_t *vpred, int ystride, int uvstride); -#endif -#endif +#endif // CONFIG_SUPERBLOCKS -#endif // __INC_RECONINTRA_H +#endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c index 82338901a..d61a515b8 100644 --- a/vp9/common/vp9_reconintra4x4.c +++ b/vp9/common/vp9_reconintra4x4.c @@ -15,7 +15,7 @@ #include "vp9_rtcd.h" #if CONFIG_NEWBINTRAMODES -static int find_grad_measure(unsigned char *x, int stride, int n, int t, +static int find_grad_measure(uint8_t *x, int stride, int n, int t, int dx, int dy) { int i, j; int count = 0, gsum = 0, gdiv; @@ -35,8 +35,8 @@ static int find_grad_measure(unsigned char *x, int stride, int n, int t, } #if CONTEXT_PRED_REPLACEMENTS == 6 -B_PREDICTION_MODE vp9_find_dominant_direction( - unsigned char *ptr, int stride, int n) { +B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, + int stride, int n) { int g[8], i, imin, imax; g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1); g[2] = find_grad_measure(ptr, stride, n, 4, 1, 1); @@ -72,8 +72,8 @@ B_PREDICTION_MODE vp9_find_dominant_direction( } } #elif CONTEXT_PRED_REPLACEMENTS == 4 -B_PREDICTION_MODE vp9_find_dominant_direction( - unsigned char *ptr, int stride, int n) { +B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, + int stride, int n) { int g[8], i, imin, imax; g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1); g[3] = find_grad_measure(ptr, stride, n, 4, 1, 2); @@ -103,8 +103,8 @@ B_PREDICTION_MODE vp9_find_dominant_direction( } } #elif CONTEXT_PRED_REPLACEMENTS == 0 -B_PREDICTION_MODE vp9_find_dominant_direction( - unsigned char *ptr, int stride, int n) { +B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr, + int stride, int n) { int g[8], i, imin, imax; g[0] = find_grad_measure(ptr, stride, n, 4, 1, 0); g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1); @@ -145,7 +145,7 @@ B_PREDICTION_MODE vp9_find_dominant_direction( #endif B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x) { - unsigned char *ptr = *(x->base_dst) + x->dst; + uint8_t *ptr = *(x->base_dst) + x->dst; int stride = x->dst_stride; return vp9_find_dominant_direction(ptr, stride, 4); } @@ -153,12 +153,12 @@ B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x) { void vp9_intra4x4_predict(BLOCKD *x, int b_mode, - unsigned char *predictor) { + uint8_t *predictor) { int i, r, c; - unsigned char *above = *(x->base_dst) + x->dst - x->dst_stride; - unsigned char left[4]; - unsigned char top_left = above[-1]; + uint8_t *above = *(x->base_dst) + x->dst - x->dst_stride; + uint8_t left[4]; + uint8_t top_left = above[-1]; left[0] = (*(x->base_dst))[x->dst - 1]; left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride]; @@ -203,8 +203,8 @@ void vp9_intra4x4_predict(BLOCKD *x, break; case B_VE_PRED: { - unsigned int ap[4]; + ap[0] = above[0]; ap[1] = above[1]; ap[2] = above[2]; @@ -212,20 +212,17 @@ void vp9_intra4x4_predict(BLOCKD *x, for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) { - predictor[c] = ap[c]; } predictor += 16; } - } break; - case B_HE_PRED: { - unsigned int lp[4]; + lp[0] = left[0]; lp[1] = left[1]; lp[2] = left[2]; @@ -241,7 +238,8 @@ void vp9_intra4x4_predict(BLOCKD *x, } break; case B_LD_PRED: { - unsigned char *ptr = above; + uint8_t *ptr = above; + predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2; predictor[0 * 16 + 1] = predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2; @@ -262,8 +260,7 @@ void vp9_intra4x4_predict(BLOCKD *x, } break; case B_RD_PRED: { - - unsigned char pp[9]; + uint8_t pp[9]; pp[0] = left[3]; pp[1] = left[2]; @@ -295,8 +292,7 @@ void vp9_intra4x4_predict(BLOCKD *x, } break; case B_VR_PRED: { - - unsigned char pp[9]; + uint8_t pp[9]; pp[0] = left[3]; pp[1] = left[2]; @@ -308,7 +304,6 @@ void vp9_intra4x4_predict(BLOCKD *x, pp[7] = above[2]; pp[8] = above[3]; - predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; predictor[3 * 16 + 1] = @@ -329,8 +324,7 @@ void vp9_intra4x4_predict(BLOCKD *x, } break; case B_VL_PRED: { - - unsigned char *pp = above; + uint8_t *pp = above; predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; @@ -352,7 +346,8 @@ void vp9_intra4x4_predict(BLOCKD *x, break; case B_HD_PRED: { - unsigned char pp[9]; + uint8_t pp[9]; + pp[0] = left[3]; pp[1] = left[2]; pp[2] = left[1]; @@ -385,7 +380,7 @@ void vp9_intra4x4_predict(BLOCKD *x, case B_HU_PRED: { - unsigned char *pp = left; + uint8_t *pp = left; predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; predictor[0 * 16 + 2] = @@ -419,9 +414,9 @@ void vp9_intra4x4_predict(BLOCKD *x, #if CONFIG_COMP_INTRA_PRED void vp9_comp_intra4x4_predict_c(BLOCKD *x, - int b_mode, int b_mode2, - unsigned char *out_predictor) { - unsigned char predictor[2][4 * 16]; + int b_mode, int b_mode2, + uint8_t *out_predictor) { + uint8_t predictor[2][4 * 16]; int i, j; vp9_intra4x4_predict(x, b_mode, predictor[0]); @@ -440,18 +435,18 @@ void vp9_comp_intra4x4_predict_c(BLOCKD *x, */ void vp9_intra_prediction_down_copy(MACROBLOCKD *xd) { int extend_edge = (xd->mb_to_right_edge == 0 && xd->mb_index < 2); - unsigned char *above_right = *(xd->block[0].base_dst) + xd->block[0].dst - + uint8_t *above_right = *(xd->block[0].base_dst) + xd->block[0].dst - xd->block[0].dst_stride + 16; - unsigned int *src_ptr = (unsigned int *) + uint32_t *src_ptr = (uint32_t *) (above_right - (xd->mb_index == 3 ? 16 * xd->block[0].dst_stride : 0)); - unsigned int *dst_ptr0 = (unsigned int *)above_right; - unsigned int *dst_ptr1 = - (unsigned int *)(above_right + 4 * xd->block[0].dst_stride); - unsigned int *dst_ptr2 = - (unsigned int *)(above_right + 8 * xd->block[0].dst_stride); - unsigned int *dst_ptr3 = - (unsigned int *)(above_right + 12 * xd->block[0].dst_stride); + uint32_t *dst_ptr0 = (uint32_t *)above_right; + uint32_t *dst_ptr1 = + (uint32_t *)(above_right + 4 * xd->block[0].dst_stride); + uint32_t *dst_ptr2 = + (uint32_t *)(above_right + 8 * xd->block[0].dst_stride); + uint32_t *dst_ptr3 = + (uint32_t *)(above_right + 12 * xd->block[0].dst_stride); if (extend_edge) { *src_ptr = ((uint8_t *) src_ptr)[-1] * 0x01010101U; diff --git a/vp9/common/vp9_reconintra4x4.h b/vp9/common/vp9_reconintra4x4.h index 8e806bcb7..4e58731e8 100644 --- a/vp9/common/vp9_reconintra4x4.h +++ b/vp9/common/vp9_reconintra4x4.h @@ -14,4 +14,4 @@ extern void vp9_intra_prediction_down_copy(MACROBLOCKD *xd); -#endif +#endif // VP9_COMMON_VP9_RECONINTRA4X4_H_ diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 49a3a8595..44781cb8a 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -4,6 +4,8 @@ cat <y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); for (i = 0; i < ybf->y_height; i++) - ybf->y_buffer[ybf->y_stride * i - 1] = (unsigned char) 129; + ybf->y_buffer[ybf->y_stride * i - 1] = (uint8_t) 129; vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); for (i = 0; i < ybf->uv_height; i++) - ybf->u_buffer[ybf->uv_stride * i - 1] = (unsigned char) 129; + ybf->u_buffer[ybf->uv_stride * i - 1] = (uint8_t) 129; vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); for (i = 0; i < ybf->uv_height; i++) - ybf->v_buffer[ybf->uv_stride * i - 1] = (unsigned char) 129; + ybf->v_buffer[ybf->uv_stride * i - 1] = (uint8_t) 129; } diff --git a/vp9/common/vp9_setupintrarecon.h b/vp9/common/vp9_setupintrarecon.h index 1a55d0ad6..457265528 100644 --- a/vp9/common/vp9_setupintrarecon.h +++ b/vp9/common/vp9_setupintrarecon.h @@ -8,6 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ +#ifndef VP9_COMMON_VP9_SETUPINTRARECON_H_ +#define VP9_COMMON_VP9_SETUPINTRARECON_H_ #include "vpx_scale/yv12config.h" + extern void vp9_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); + +#endif // VP9_COMMON_VP9_SETUPINTRARECON_H_ diff --git a/vp9/common/vp9_subpelvar.h b/vp9/common/vp9_subpelvar.h index 79fed5571..ad674f105 100644 --- a/vp9/common/vp9_subpelvar.h +++ b/vp9/common/vp9_subpelvar.h @@ -8,14 +8,14 @@ * be found in the AUTHORS file in the root of the source tree. */ +#ifndef VP9_COMMON_VP9_SUBPELVAR_H_ +#define VP9_COMMON_VP9_SUBPELVAR_H_ #include "vp9/common/vp9_filter.h" - - -static void variance(const unsigned char *src_ptr, +static void variance(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, int w, int h, @@ -67,13 +67,13 @@ static void variance(const unsigned char *src_ptr, * to the next. * ****************************************************************************/ -static void var_filter_block2d_bil_first_pass(const unsigned char *src_ptr, - unsigned short *output_ptr, +static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, + uint16_t *output_ptr, unsigned int src_pixels_per_line, int pixel_step, unsigned int output_height, unsigned int output_width, - const short *vp9_filter) { + const int16_t *vp9_filter) { unsigned int i, j; for (i = 0; i < output_height; i++) { @@ -119,13 +119,13 @@ static void var_filter_block2d_bil_first_pass(const unsigned char *src_ptr, * to the next. * ****************************************************************************/ -static void var_filter_block2d_bil_second_pass(const unsigned short *src_ptr, - unsigned char *output_ptr, +static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, + uint8_t *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, - const short *vp9_filter) { + const int16_t *vp9_filter) { unsigned int i, j; int Temp; @@ -145,3 +145,4 @@ static void var_filter_block2d_bil_second_pass(const unsigned short *src_ptr, } } +#endif // VP9_COMMON_VP9_SUBPELVAR_H_ diff --git a/vp9/common/vp9_subpixel.h b/vp9/common/vp9_subpixel.h index 5824e1aa9..dc4eadfb1 100644 --- a/vp9/common/vp9_subpixel.h +++ b/vp9/common/vp9_subpixel.h @@ -8,14 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_SUBPIXEL_H_ #define VP9_COMMON_VP9_SUBPIXEL_H_ #define prototype_subpixel_predict(sym) \ - void sym(unsigned char *src, int src_pitch, int xofst, int yofst, \ - unsigned char *dst, int dst_pitch) + void sym(uint8_t *src, int src_pitch, int xofst, int yofst, \ + uint8_t *dst, int dst_pitch) typedef prototype_subpixel_predict((*vp9_subpix_fn_t)); -#endif +#endif // VP9_COMMON_VP9_SUBPIXEL_H_ diff --git a/vp9/common/vp9_swapyv12buffer.c b/vp9/common/vp9_swapyv12buffer.c index b01462538..10c6b4171 100644 --- a/vp9/common/vp9_swapyv12buffer.c +++ b/vp9/common/vp9_swapyv12buffer.c @@ -12,7 +12,7 @@ void vp9_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame) { - unsigned char *temp; + uint8_t *temp; temp = last_frame->buffer_alloc; last_frame->buffer_alloc = new_frame->buffer_alloc; diff --git a/vp9/common/vp9_swapyv12buffer.h b/vp9/common/vp9_swapyv12buffer.h index 43001763a..2e112069a 100644 --- a/vp9/common/vp9_swapyv12buffer.h +++ b/vp9/common/vp9_swapyv12buffer.h @@ -16,4 +16,4 @@ void vp9_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame); -#endif // __SWAPYV12_BUFFER_H +#endif // VP9_COMMON_VP9_SWAPYV12BUFFER_H_ diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h index 5bc6237e6..d57a42df3 100644 --- a/vp9/common/vp9_systemdependent.h +++ b/vp9/common/vp9_systemdependent.h @@ -7,6 +7,7 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ + #ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ #define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ @@ -34,4 +35,5 @@ static int round(double x) { struct VP9Common; void vp9_machine_specific_config(struct VP9Common *); -#endif + +#endif // VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ diff --git a/vp9/common/vp9_textblit.h b/vp9/common/vp9_textblit.h index 81bfa253e..8285aa7fd 100644 --- a/vp9/common/vp9_textblit.h +++ b/vp9/common/vp9_textblit.h @@ -16,4 +16,4 @@ extern void vp9_blit_text(const char *msg, unsigned char *address, extern void vp9_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch); -#endif // __INC_TEXTBLIT_H +#endif // VP9_COMMON_VP9_TEXTBLIT_H_ diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h index 525b77dc8..0c0c5e96e 100644 --- a/vp9/common/vp9_treecoder.h +++ b/vp9/common/vp9_treecoder.h @@ -8,31 +8,18 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_TREECODER_H_ #define VP9_COMMON_VP9_TREECODER_H_ #include "vpx/vpx_integer.h" -typedef unsigned char vp9_prob; +typedef uint8_t vp9_prob; -#define vp9_prob_half ( (vp9_prob) 128) +#define vp9_prob_half ((vp9_prob) 128) -typedef signed char vp9_tree_index; -struct bool_coder_spec; - -typedef struct bool_coder_spec bool_coder_spec; -typedef struct bool_writer bool_writer; -typedef struct bool_reader bool_reader; - -typedef const bool_coder_spec c_bool_coder_spec; -typedef const bool_writer c_bool_writer; -typedef const bool_reader c_bool_reader; - - - -# define vp9_complement( x) (255 - x) +typedef int8_t vp9_tree_index; +#define vp9_complement(x) (255 - x) /* We build coding trees compactly in arrays. Each node of the tree is a pair of vp9_tree_indices. @@ -43,7 +30,6 @@ typedef const bool_reader c_bool_reader; typedef const vp9_tree_index vp9_tree[], *vp9_tree_p; - typedef const struct vp9_token_struct { int value; int Len; @@ -55,20 +41,17 @@ void vp9_tokens_from_tree(struct vp9_token_struct *, vp9_tree); void vp9_tokens_from_tree_offset(struct vp9_token_struct *, vp9_tree, int offset); - /* Convert array of token occurrence counts into a table of probabilities for the associated binary encoding tree. Also writes count of branches taken for each node on the tree; this facilitiates decisions as to probability updates. */ -void vp9_tree_probs_from_distribution( - int n, /* n = size of alphabet */ - vp9_token tok [ /* n */ ], - vp9_tree tree, - vp9_prob probs [ /* n-1 */ ], - unsigned int branch_ct [ /* n-1 */ ] [2], - const unsigned int num_events[ /* n */ ] -); +void vp9_tree_probs_from_distribution(int n, /* n = size of alphabet */ + vp9_token tok[ /* n */ ], + vp9_tree tree, + vp9_prob probs[ /* n - 1 */ ], + unsigned int branch_ct[ /* n - 1 */ ][2], + const unsigned int num_events[ /* n */ ]); static __inline vp9_prob clip_prob(int p) { return (p > 255) ? 255u : (p < 1) ? 1u : p; @@ -87,4 +70,4 @@ static __inline vp9_prob weighted_prob(int prob1, int prob2, int factor) { return (prob1 * (256 - factor) + prob2 * factor + 128) >> 8; } -#endif +#endif // VP9_COMMON_VP9_TREECODER_H_ diff --git a/vp9/decoder/vp9_dboolhuff.h b/vp9/decoder/vp9_dboolhuff.h index a1c0c7956..c8c5c3b01 100644 --- a/vp9/decoder/vp9_dboolhuff.h +++ b/vp9/decoder/vp9_dboolhuff.h @@ -8,9 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_DECODER_VP9_DBOOLHUFF_H_ #define VP9_DECODER_VP9_DBOOLHUFF_H_ + #include #include #include "vpx_ports/config.h" @@ -33,7 +33,7 @@ typedef struct { unsigned int range; } BOOL_DECODER; -DECLARE_ALIGNED(16, extern const unsigned char, vp9_norm[256]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]); int vp9_start_decode(BOOL_DECODER *br, const unsigned char *source, @@ -152,4 +152,4 @@ static int bool_error(BOOL_DECODER *br) { extern int vp9_decode_unsigned_max(BOOL_DECODER *br, int max); -#endif +#endif // VP9_DECODER_VP9_DBOOLHUFF_H_ diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h index a5c44670c..5cd935760 100644 --- a/vp9/decoder/vp9_decodemv.h +++ b/vp9/decoder/vp9_decodemv.h @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#ifndef VP9_DECODER_VP9_DECODEMV_H_ +#define VP9_DECODER_VP9_DECODEMV_H_ #include "vp9/decoder/vp9_onyxd_int.h" @@ -17,3 +19,5 @@ void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, int mb_col, BOOL_DECODER* const bc); void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc); + +#endif // VP9_DECODER_VP9_DECODEMV_H_ diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 8d9f77b77..c3a17eff0 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -82,17 +82,17 @@ void vp9_init_de_quantizer(VP9D_COMP *pbi) { VP9_COMMON *const pc = &pbi->common; for (Q = 0; Q < QINDEX_RANGE; Q++) { - pc->Y1dequant[Q][0] = (short)vp9_dc_quant(Q, pc->y1dc_delta_q); - pc->Y2dequant[Q][0] = (short)vp9_dc2quant(Q, pc->y2dc_delta_q); - pc->UVdequant[Q][0] = (short)vp9_dc_uv_quant(Q, pc->uvdc_delta_q); + pc->Y1dequant[Q][0] = (int16_t)vp9_dc_quant(Q, pc->y1dc_delta_q); + pc->Y2dequant[Q][0] = (int16_t)vp9_dc2quant(Q, pc->y2dc_delta_q); + pc->UVdequant[Q][0] = (int16_t)vp9_dc_uv_quant(Q, pc->uvdc_delta_q); /* all the ac values =; */ for (i = 1; i < 16; i++) { int rc = vp9_default_zig_zag1d_4x4[i]; - pc->Y1dequant[Q][rc] = (short)vp9_ac_yquant(Q); - pc->Y2dequant[Q][rc] = (short)vp9_ac2quant(Q, pc->y2ac_delta_q); - pc->UVdequant[Q][rc] = (short)vp9_ac_uv_quant(Q, pc->uvac_delta_q); + pc->Y1dequant[Q][rc] = (int16_t)vp9_ac_yquant(Q); + pc->Y2dequant[Q][rc] = (int16_t)vp9_ac2quant(Q, pc->y2ac_delta_q); + pc->UVdequant[Q][rc] = (int16_t)vp9_ac_uv_quant(Q, pc->uvac_delta_q); } } } @@ -283,10 +283,10 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, for (i = 0; i < 4; i++) { int ib = vp9_i8x8_block[i]; int idx = (ib & 0x02) ? (ib + 2) : ib; - short *q = xd->block[idx].qcoeff; - short *dq = xd->block[0].dequant; - unsigned char *pre = xd->block[ib].predictor; - unsigned char *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst; + int16_t *q = xd->block[idx].qcoeff; + int16_t *dq = xd->block[0].dequant; + uint8_t *pre = xd->block[ib].predictor; + uint8_t *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst; int stride = xd->dst.y_stride; BLOCKD *b = &xd->block[ib]; if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { @@ -580,8 +580,8 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, for (i = 0; i < 4; i++) { int ib = vp9_i8x8_block[i]; int idx = (ib & 0x02) ? (ib + 2) : ib; - short *q = xd->block[idx].qcoeff; - short *dq = xd->block[0].dequant; + int16_t *q = xd->block[idx].qcoeff; + int16_t *dq = xd->block[0].dequant; int stride = xd->dst.y_stride; BLOCKD *b = &xd->block[ib]; tx_type = get_tx_type_8x8(xd, &xd->block[ib]); diff --git a/vp9/decoder/vp9_decodframe.h b/vp9/decoder/vp9_decodframe.h index 2a6547ed9..ae25428c4 100644 --- a/vp9/decoder/vp9_decodframe.h +++ b/vp9/decoder/vp9_decodframe.h @@ -16,4 +16,4 @@ struct VP9Decompressor; extern void vp9_init_de_quantizer(struct VP9Decompressor *pbi); -#endif // __INC_DECODFRAME_H +#endif // VP9_DECODER_VP9_DECODFRAME_H_ diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 9e382914b..4376dc3d3 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -188,7 +188,7 @@ void vp9_dequantize_b_2x2_c(BLOCKD *d) { void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, - int stride, int dc, uint16_t eobs) { + int stride, int dc, int eob) { int16_t output[64]; int16_t *diff_ptr = output; int i; @@ -204,10 +204,10 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, * TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. * Combine that with code here. */ - if (eobs == 0) { + if (eob == 0) { /* All 0 DCT coefficient */ vp9_copy_mem8x8(pred, pitch, dest, stride); - } else if (eobs == 1) { + } else if (eob == 1) { /* DC only DCT coefficient. */ int16_t out; @@ -220,7 +220,7 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq, input[0] = 0; add_constant_residual(out, pred, pitch, dest, stride, 8, 8); - } else if (eobs <= 10) { + } else if (eob <= 10) { input[1] = input[1] * dq[1]; input[2] = input[2] * dq[1]; input[3] = input[3] * dq[1]; @@ -280,17 +280,17 @@ void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, - int stride, uint16_t eobs) { + int stride, int eob) { int16_t output[256]; int16_t *diff_ptr = output; int i; /* The calculation can be simplified if there are not many non-zero dct * coefficients. Use eobs to separate different cases. */ - if (eobs == 0) { + if (eob == 0) { /* All 0 DCT coefficient */ vp9_copy_mem16x16(pred, pitch, dest, stride); - } else if (eobs == 1) { + } else if (eob == 1) { /* DC only DCT coefficient. */ int16_t out; @@ -303,7 +303,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, input[0] = 0; add_constant_residual(out, pred, pitch, dest, stride, 16, 16); - } else if (eobs <= 10) { + } else if (eob <= 10) { input[0]= input[0] * dq[0]; input[1] = input[1] * dq[1]; input[2] = input[2] * dq[1]; @@ -343,8 +343,8 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS void vp9_dequant_idct_add_32x32(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, - int stride, uint16_t eobs) { - short output[1024]; + int stride, int eob) { + int16_t output[1024]; int i; input[0]= input[0] * dq[0] / 2; @@ -356,11 +356,11 @@ void vp9_dequant_idct_add_32x32(int16_t *input, const int16_t *dq, add_residual(output, pred, pitch, dest, stride, 32, 32); } -void vp9_dequant_idct_add_uv_block_16x16_c(short *q, const short *dq, - unsigned char *dstu, - unsigned char *dstv, +void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq, + uint8_t *dstu, + uint8_t *dstv, int stride, - unsigned short *eobs) { + uint16_t *eobs) { vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride, eobs[0]); vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride, eobs[4]); diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index 8a6bf2b26..c578608ba 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -11,92 +11,108 @@ #ifndef VP9_DECODER_VP9_DEQUANTIZE_H_ #define VP9_DECODER_VP9_DEQUANTIZE_H_ + #include "vp9/common/vp9_blockd.h" #if CONFIG_LOSSLESS -extern void vp9_dequant_idct_add_lossless_c(short *input, const short *dq, - unsigned char *pred, - unsigned char *output, +extern void vp9_dequant_idct_add_lossless_c(int16_t *input, + const int16_t *dq, + uint8_t *pred, + uint8_t *output, int pitch, int stride); -extern void vp9_dequant_dc_idct_add_lossless_c(short *input, const short *dq, - unsigned char *pred, - unsigned char *output, +extern void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, + const int16_t *dq, + uint8_t *pred, + uint8_t *output, int pitch, int stride, int dc); -extern void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q, - const short *dq, - unsigned char *pre, - unsigned char *dst, +extern void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, + const int16_t *dq, + uint8_t *pre, + uint8_t *dst, int stride, - unsigned short *eobs, - const short *dc); -extern void vp9_dequant_idct_add_y_block_lossless_c(short *q, const short *dq, - unsigned char *pre, - unsigned char *dst, + uint16_t *eobs, + const int16_t *dc); +extern void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, + const int16_t *dq, + uint8_t *pre, + uint8_t *dst, int stride, - unsigned short *eobs); -extern void vp9_dequant_idct_add_uv_block_lossless_c(short *q, const short *dq, - unsigned char *pre, - unsigned char *dst_u, - unsigned char *dst_v, + uint16_t *eobs); +extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, + const int16_t *dq, + uint8_t *pre, + uint8_t *dst_u, + uint8_t *dst_v, int stride, - unsigned short *eobs); -#endif + uint16_t *eobs); +#endif // CONFIG_LOSSLESS -typedef void (*vp9_dequant_idct_add_fn_t)(short *input, const short *dq, - unsigned char *pred, unsigned char *output, int pitch, int stride); -typedef void(*vp9_dequant_dc_idct_add_fn_t)(short *input, const short *dq, - unsigned char *pred, unsigned char *output, int pitch, int stride, int dc); +typedef void (*vp9_dequant_idct_add_fn_t)(int16_t *input, const int16_t *dq, + uint8_t *pred, uint8_t *output, + int pitch, int stride); +typedef void(*vp9_dequant_dc_idct_add_fn_t)(int16_t *input, const int16_t *dq, + uint8_t *pred, uint8_t *output, + int pitch, int stride, int dc); -typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(short *q, const short *dq, - unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, - const short *dc); -typedef void(*vp9_dequant_idct_add_y_block_fn_t)(short *q, const short *dq, - unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs); -typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(short *q, const short *dq, - unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride, - unsigned short *eobs); +typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, + const int16_t *dq, + uint8_t *pre, uint8_t *dst, + int stride, uint16_t *eobs, + const int16_t *dc); +typedef void(*vp9_dequant_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq, + uint8_t *pre, uint8_t *dst, + int stride, uint16_t *eobs); +typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(int16_t *q, const int16_t *dq, + uint8_t *pre, uint8_t *dst_u, + uint8_t *dst_v, int stride, + uint16_t *eobs); -void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, const short *dq, - unsigned char *pred, unsigned char *dest, - int pitch, int stride); +void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, + const int16_t *dq, + uint8_t *pred, uint8_t *dest, + int pitch, int stride); -void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, - const short *dq, unsigned char *pred, - unsigned char *dest, int pitch, int stride); +void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, + const int16_t *dq, uint8_t *pred, + uint8_t *dest, int pitch, int stride); -void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, - const short *dq, unsigned char *pred, - unsigned char *dest, +void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, + const int16_t *dq, uint8_t *pred, + uint8_t *dest, int pitch, int stride); #if CONFIG_SUPERBLOCKS -void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, const short *dq, - unsigned char *dst, +void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dst, int stride, - unsigned short *eobs, - const short *dc, + uint16_t *eobs, + const int16_t *dc, MACROBLOCKD *xd); -void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(short *q, const short *dq, - unsigned char *dst, +void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dst, int stride, - unsigned short *eobs, - const short *dc, + uint16_t *eobs, + const int16_t *dc, MACROBLOCKD *xd); -void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, const short *dq, - unsigned char *dstu, - unsigned char *dstv, +void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dstu, + uint8_t *dstv, int stride, - unsigned short *eobs, + uint16_t *eobs, MACROBLOCKD *xd); -void vp9_dequant_idct_add_uv_block_4x4_inplace_c(short *q, const short *dq, - unsigned char *dstu, - unsigned char *dstv, +void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dstu, + uint8_t *dstv, int stride, - unsigned short *eobs, + uint16_t *eobs, MACROBLOCKD *xd); -#endif +#endif // CONFIG_SUPERBLOCKS -#endif +#endif // VP9_DECODER_VP9_DEQUANTIZE_H_ diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 7211b9094..8fe218494 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -53,11 +53,11 @@ #define CAT5_PROB3 157 #define CAT5_PROB4 180 -static const unsigned char cat6_prob[15] = { +static const vp9_prob cat6_prob[15] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -DECLARE_ALIGNED(16, extern const unsigned char, vp9_norm[256]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]); static int get_signed(BOOL_DECODER *br, int value_to_sign) { const int split = (br->range + 1) >> 1; @@ -254,7 +254,7 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context; ENTROPY_CONTEXT* const A1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]); ENTROPY_CONTEXT* const L1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]); - unsigned short* const eobs = xd->eobs; + uint16_t *const eobs = xd->eobs; const int segment_id = xd->mode_info_context->mbmi.segment_id; int c, i, eobtotal = 0, seg_eob; @@ -320,7 +320,7 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, BOOL_DECODER* const bc) { ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context; ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context; - unsigned short* const eobs = xd->eobs; + uint16_t *const eobs = xd->eobs; const int segment_id = xd->mode_info_context->mbmi.segment_id; int c, i, eobtotal = 0, seg_eob; // Luma block @@ -374,7 +374,7 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, BOOL_DECODER* const bc) { ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context; ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; - unsigned short *const eobs = xd->eobs; + uint16_t *const eobs = xd->eobs; PLANE_TYPE type; int c, i, eobtotal = 0, seg_eob; const int segment_id = xd->mode_info_context->mbmi.segment_id; @@ -471,7 +471,7 @@ static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context; ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_4X4][i]; ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_4X4][i]; - unsigned short *const eobs = xd->eobs; + uint16_t *const eobs = xd->eobs; int c; c = decode_coefs(dx, xd, bc, a, l, type, tx_type, seg_eob, diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h index 09d354ea6..da11e57ef 100644 --- a/vp9/decoder/vp9_detokenize.h +++ b/vp9/decoder/vp9_detokenize.h @@ -32,4 +32,4 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd, BOOL_DECODER* const bc); -#endif /* DETOKENIZE_H */ +#endif // VP9_DECODER_VP9_DETOKENIZE_H_ diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index 72ef52b5a..6ca73f65a 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -14,11 +14,11 @@ #include "vp9/decoder/vp9_dequantize.h" #endif -void vp9_dequant_dc_idct_add_y_block_c(short *q, const short *dq, - unsigned char *pre, - unsigned char *dst, - int stride, unsigned short *eobs, - const short *dc) { +void vp9_dequant_dc_idct_add_y_block_c(int16_t *q, const int16_t *dq, + uint8_t *pre, + uint8_t *dst, + int stride, uint16_t *eobs, + const int16_t *dc) { int i, j; for (i = 0; i < 4; i++) { @@ -40,11 +40,12 @@ void vp9_dequant_dc_idct_add_y_block_c(short *q, const short *dq, } #if CONFIG_SUPERBLOCKS -void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(short *q, const short *dq, - unsigned char *dst, +void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dst, int stride, - unsigned short *eobs, - const short *dc, + uint16_t *eobs, + const int16_t *dc, MACROBLOCKD *xd) { int i, j; @@ -65,10 +66,10 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(short *q, const short *dq, } #endif -void vp9_dequant_idct_add_y_block_c(short *q, const short *dq, - unsigned char *pre, - unsigned char *dst, - int stride, unsigned short *eobs) { +void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, + uint8_t *pre, + uint8_t *dst, + int stride, uint16_t *eobs) { int i, j; for (i = 0; i < 4; i++) { @@ -90,10 +91,10 @@ void vp9_dequant_idct_add_y_block_c(short *q, const short *dq, } } -void vp9_dequant_idct_add_uv_block_c(short *q, const short *dq, - unsigned char *pre, unsigned char *dstu, - unsigned char *dstv, int stride, - unsigned short *eobs) { +void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, + uint8_t *pre, uint8_t *dstu, + uint8_t *dstv, int stride, + uint16_t *eobs) { int i, j; for (i = 0; i < 2; i++) { @@ -134,11 +135,11 @@ void vp9_dequant_idct_add_uv_block_c(short *q, const short *dq, } #if CONFIG_SUPERBLOCKS -void vp9_dequant_idct_add_uv_block_4x4_inplace_c(short *q, const short *dq, - unsigned char *dstu, - unsigned char *dstv, +void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, + uint8_t *dstu, + uint8_t *dstv, int stride, - unsigned short *eobs, + uint16_t *eobs, MACROBLOCKD *xd) { int i, j; @@ -176,11 +177,11 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(short *q, const short *dq, } #endif -void vp9_dequant_dc_idct_add_y_block_8x8_c(short *q, const short *dq, - unsigned char *pre, - unsigned char *dst, - int stride, unsigned short *eobs, - const short *dc, +void vp9_dequant_dc_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, + uint8_t *pre, + uint8_t *dst, + int stride, uint16_t *eobs, + const int16_t *dc, MACROBLOCKD *xd) { q[0] = dc[0]; vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 1, xd->eobs[0]); @@ -200,11 +201,12 @@ void vp9_dequant_dc_idct_add_y_block_8x8_c(short *q, const short *dq, } #if CONFIG_SUPERBLOCKS -void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, const short *dq, - unsigned char *dst, +void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dst, int stride, - unsigned short *eobs, - const short *dc, + uint16_t *eobs, + const int16_t *dc, MACROBLOCKD *xd) { q[0] = dc[0]; vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, 1, xd->eobs[0]); @@ -225,13 +227,13 @@ void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, const short *dq, } #endif -void vp9_dequant_idct_add_y_block_8x8_c(short *q, const short *dq, - unsigned char *pre, - unsigned char *dst, - int stride, unsigned short *eobs, +void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, + uint8_t *pre, + uint8_t *dst, + int stride, uint16_t *eobs, MACROBLOCKD *xd) { - unsigned char *origdest = dst; - unsigned char *origpred = pre; + uint8_t *origdest = dst; + uint8_t *origpred = pre; vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 0, xd->eobs[0]); vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8, @@ -243,11 +245,11 @@ void vp9_dequant_idct_add_y_block_8x8_c(short *q, const short *dq, xd->eobs[12]); } -void vp9_dequant_idct_add_uv_block_8x8_c(short *q, const short *dq, - unsigned char *pre, - unsigned char *dstu, - unsigned char *dstv, - int stride, unsigned short *eobs, +void vp9_dequant_idct_add_uv_block_8x8_c(int16_t *q, const int16_t *dq, + uint8_t *pre, + uint8_t *dstu, + uint8_t *dstv, + int stride, uint16_t *eobs, MACROBLOCKD *xd) { vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, 0, xd->eobs[16]); @@ -258,11 +260,11 @@ void vp9_dequant_idct_add_uv_block_8x8_c(short *q, const short *dq, } #if CONFIG_SUPERBLOCKS -void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, const short *dq, - unsigned char *dstu, - unsigned char *dstv, +void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, + uint8_t *dstu, + uint8_t *dstv, int stride, - unsigned short *eobs, + uint16_t *eobs, MACROBLOCKD *xd) { vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride, 0, xd->eobs[16]); @@ -274,12 +276,12 @@ void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, const short *dq, #endif #if CONFIG_LOSSLESS -void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q, const short *dq, - unsigned char *pre, - unsigned char *dst, +void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, + uint8_t *pre, + uint8_t *dst, int stride, - unsigned short *eobs, - const short *dc) { + uint16_t *eobs, + const int16_t *dc) { int i, j; for (i = 0; i < 4; i++) { @@ -300,10 +302,10 @@ void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q, const short *dq, } } -void vp9_dequant_idct_add_y_block_lossless_c(short *q, const short *dq, - unsigned char *pre, - unsigned char *dst, - int stride, unsigned short *eobs) { +void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, + uint8_t *pre, + uint8_t *dst, + int stride, uint16_t *eobs) { int i, j; for (i = 0; i < 4; i++) { @@ -325,12 +327,12 @@ void vp9_dequant_idct_add_y_block_lossless_c(short *q, const short *dq, } } -void vp9_dequant_idct_add_uv_block_lossless_c(short *q, const short *dq, - unsigned char *pre, - unsigned char *dstu, - unsigned char *dstv, +void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, + uint8_t *pre, + uint8_t *dstu, + uint8_t *dstv, int stride, - unsigned short *eobs) { + uint16_t *eobs) { int i, j; for (i = 0; i < 2; i++) { diff --git a/vp9/common/vp9_onyxd.h b/vp9/decoder/vp9_onyxd.h similarity index 97% rename from vp9/common/vp9_onyxd.h rename to vp9/decoder/vp9_onyxd.h index 807676fdc..93321ef34 100644 --- a/vp9/common/vp9_onyxd.h +++ b/vp9/decoder/vp9_onyxd.h @@ -8,15 +8,12 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_COMMON_VP9_ONYXD_H_ #define VP9_COMMON_VP9_ONYXD_H_ - /* Create/destroy static data structures. */ #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_ppflags.h" @@ -64,4 +61,4 @@ extern "C" } #endif -#endif // __INC_ONYXD_H +#endif // VP9_COMMON_VP9_ONYXD_H_ diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index 9c0b581be..b6b686377 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -13,7 +13,7 @@ #if CONFIG_POSTPROC #include "vp9/common/vp9_postproc.h" #endif -#include "vp9/common/vp9_onyxd.h" +#include "vp9/decoder/vp9_onyxd.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_alloccommon.h" @@ -37,7 +37,7 @@ static void ref_cnt_fb(int *buf, int *idx, int new_idx); #if WRITE_RECON_BUFFER == 1 static void recon_write_yuv_frame(char *name, YV12_BUFFER_CONFIG *s) { FILE *yuv_file = fopen((char *)name, "ab"); - unsigned char *src = s->y_buffer; + uint8_t *src = s->y_buffer; int h = s->y_height; do { diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index 73229ed83..6b7184fbe 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -8,11 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_DECODER_VP9_ONYXD_INT_H_ #define VP9_DECODER_VP9_ONYXD_INT_H_ + #include "vpx_ports/config.h" -#include "vp9/common/vp9_onyxd.h" +#include "vp9/decoder/vp9_onyxd.h" #include "vp9/decoder/vp9_treereader.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/decoder/vp9_dequantize.h" @@ -103,4 +103,4 @@ int vp9_decode_frame(VP9D_COMP *cpi, const unsigned char **p_data_end); } while(0) #endif -#endif // __INC_ONYXD_INT_H +#endif // VP9_DECODER_VP9_TREEREADER_H_ diff --git a/vp9/decoder/vp9_reconintra_mt.h b/vp9/decoder/vp9_reconintra_mt.h deleted file mode 100644 index af9634936..000000000 --- a/vp9/decoder/vp9_reconintra_mt.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_RECONINTRA_MT_H_ -#define VP9_DECODER_VP9_RECONINTRA_MT_H_ - -#endif diff --git a/vp9/decoder/vp9_treereader.h b/vp9/decoder/vp9_treereader.h index aa31dc542..cca017de4 100644 --- a/vp9/decoder/vp9_treereader.h +++ b/vp9/decoder/vp9_treereader.h @@ -34,4 +34,4 @@ static int treed_read(vp9_reader *const r, /* !!! must return a 0 or 1 !!! */ return -i; } -#endif /* tree_reader_h */ +#endif // VP9_DECODER_VP9_TREEREADER_H_ diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h index 5a63d6e1b..f7a8ece64 100644 --- a/vp9/encoder/vp9_bitstream.h +++ b/vp9/encoder/vp9_bitstream.h @@ -14,4 +14,4 @@ void vp9_update_skip_probs(VP9_COMP *cpi); -#endif +#endif // VP9_ENCODER_VP9_BITSTREAM_H_ diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 4ad095fb9..f5cfbd1a1 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_ENCODER_VP9_BLOCK_H_ #define VP9_ENCODER_VP9_BLOCK_H_ @@ -26,32 +25,32 @@ typedef struct { typedef struct block { // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries - short *src_diff; - short *coeff; + int16_t *src_diff; + int16_t *coeff; // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries - short *quant; - short *quant_fast; // fast quant deprecated for now - unsigned char *quant_shift; - short *zbin; - short *zbin_8x8; - short *zbin_16x16; + int16_t *quant; + int16_t *quant_fast; // fast quant deprecated for now + uint8_t *quant_shift; + int16_t *zbin; + int16_t *zbin_8x8; + int16_t *zbin_16x16; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - short *zbin_32x32; + int16_t *zbin_32x32; #endif - short *zrun_zbin_boost; - short *zrun_zbin_boost_8x8; - short *zrun_zbin_boost_16x16; + int16_t *zrun_zbin_boost; + int16_t *zrun_zbin_boost_8x8; + int16_t *zrun_zbin_boost_16x16; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - short *zrun_zbin_boost_32x32; + int16_t *zrun_zbin_boost_32x32; #endif - short *round; + int16_t *round; // Zbin Over Quant value short zbin_extra; - unsigned char **base_src; - unsigned char **base_second_src; + uint8_t **base_src; + uint8_t **base_second_src; int src; int src_stride; @@ -94,16 +93,16 @@ typedef struct { #if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 typedef struct superblock { - DECLARE_ALIGNED(16, short, src_diff[32*32+16*16*2]); - DECLARE_ALIGNED(16, short, coeff[32*32+16*16*2]); + DECLARE_ALIGNED(16, int16_t, src_diff[32*32+16*16*2]); + DECLARE_ALIGNED(16, int16_t, coeff[32*32+16*16*2]); } SUPERBLOCK; #endif typedef struct macroblock { - DECLARE_ALIGNED(16, short, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y - DECLARE_ALIGNED(16, short, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y + DECLARE_ALIGNED(16, int16_t, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y + DECLARE_ALIGNED(16, int16_t, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y #if !CONFIG_SUPERBLOCKS - DECLARE_ALIGNED(16, unsigned char, thismb[256]); // 16x16 Y + DECLARE_ALIGNED(16, uint8_t, thismb[256]); // 16x16 Y unsigned char *thismb_ptr; #endif @@ -188,19 +187,17 @@ typedef struct macroblock { PICK_MODE_CONTEXT sb_context[4]; #endif - void (*vp9_short_fdct4x4)(short *input, short *output, int pitch); - void (*vp9_short_fdct8x4)(short *input, short *output, int pitch); - void (*short_walsh4x4)(short *input, short *output, int pitch); + void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch); + void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch); + void (*short_walsh4x4)(int16_t *input, int16_t *output, int pitch); void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d); void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1); - void (*vp9_short_fdct8x8)(short *input, short *output, int pitch); - void (*vp9_short_fdct16x16)(short *input, short *output, int pitch); - void (*short_fhaar2x2)(short *input, short *output, int pitch); + void (*vp9_short_fdct8x8)(int16_t *input, int16_t *output, int pitch); + void (*vp9_short_fdct16x16)(int16_t *input, int16_t *output, int pitch); + void (*short_fhaar2x2)(int16_t *input, int16_t *output, int pitch); void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d); void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d); void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d); - } MACROBLOCK; - -#endif +#endif // VP9_ENCODER_VP9_BLOCK_H_ diff --git a/vp9/encoder/vp9_boolhuff.h b/vp9/encoder/vp9_boolhuff.h index 0d42ecfc4..0be4b53c1 100644 --- a/vp9/encoder/vp9_boolhuff.h +++ b/vp9/encoder/vp9_boolhuff.h @@ -109,4 +109,4 @@ static void encode_bool(BOOL_CODER *br, int bit, int probability) { br->range = range; } -#endif +#endif // VP9_ENCODER_VP9_BOOLHUFF_H_ diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index a6fab72ae..63fc1a949 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -80,7 +80,7 @@ unsigned int b_modes[B_MODE_COUNT]; * Eventually this should be replaced by custom no-reference routines, * which will be faster. */ -static const unsigned char VP9_VAR_OFFS[16] = { +static const uint8_t VP9_VAR_OFFS[16] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; @@ -1263,9 +1263,9 @@ void encode_sb_row(VP9_COMP *cpi, MODE_INFO *mic = xd->mode_info_context; PARTITION_INFO *pi = x->partition_info; signed char *gfa = x->gf_active_ptr; - unsigned char *yb = x->src.y_buffer; - unsigned char *ub = x->src.u_buffer; - unsigned char *vb = x->src.v_buffer; + uint8_t *yb = x->src.y_buffer; + uint8_t *ub = x->src.u_buffer; + uint8_t *vb = x->src.v_buffer; #endif #if CONFIG_SUPERBLOCKS diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h index 8c1716fdf..1b056e163 100644 --- a/vp9/encoder/vp9_encodeframe.h +++ b/vp9/encoder/vp9_encodeframe.h @@ -18,4 +18,4 @@ extern void vp9_build_block_offsets(struct macroblock *x); extern void vp9_setup_block_ptrs(struct macroblock *x); -#endif // __INC_ENCODEFRAME_H +#endif // VP9_ENCODER_VP9_ENCODEFRAME_H_ diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h index 91c410d08..b017673ee 100644 --- a/vp9/encoder/vp9_encodeintra.h +++ b/vp9/encoder/vp9_encodeintra.h @@ -22,4 +22,4 @@ void vp9_encode_intra8x8mby(MACROBLOCK *x); void vp9_encode_intra8x8mbuv(MACROBLOCK *x); void vp9_encode_intra8x8(MACROBLOCK *x, int ib); -#endif // __ENCODEINTRA_H_ +#endif // VP9_ENCODER_VP9_ENCODEINTRA_H_ diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 6b175085f..66ec31c83 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -21,9 +21,9 @@ #include "vp9_rtcd.h" void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { - unsigned char *src_ptr = (*(be->base_src) + be->src); - short *diff_ptr = be->src_diff; - unsigned char *pred_ptr = bd->predictor; + uint8_t *src_ptr = (*(be->base_src) + be->src); + int16_t *diff_ptr = be->src_diff; + uint8_t *pred_ptr = bd->predictor; int src_stride = be->src_stride; int r, c; @@ -40,9 +40,9 @@ void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { } void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) { - unsigned char *src_ptr = (*(be->base_src) + be->src); - short *diff_ptr = be->src_diff; - unsigned char *pred_ptr = bd->predictor; + uint8_t *src_ptr = (*(be->base_src) + be->src); + int16_t *diff_ptr = be->src_diff; + uint8_t *pred_ptr = bd->predictor; int src_stride = be->src_stride; int r, c; @@ -56,12 +56,12 @@ void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) { } } -void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc, - const unsigned char *vsrc, int src_stride, - const unsigned char *upred, - const unsigned char *vpred, int dst_stride) { - short *udiff = diff + 256; - short *vdiff = diff + 320; +void vp9_subtract_mbuv_s_c(int16_t *diff, const uint8_t *usrc, + const uint8_t *vsrc, int src_stride, + const uint8_t *upred, + const uint8_t *vpred, int dst_stride) { + int16_t *udiff = diff + 256; + int16_t *vdiff = diff + 320; int r, c; for (r = 0; r < 8; r++) { @@ -85,16 +85,16 @@ void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc, } } -void vp9_subtract_mbuv_c(short *diff, unsigned char *usrc, - unsigned char *vsrc, unsigned char *pred, int stride) { - unsigned char *upred = pred + 256; - unsigned char *vpred = pred + 320; +void vp9_subtract_mbuv_c(int16_t *diff, uint8_t *usrc, + uint8_t *vsrc, uint8_t *pred, int stride) { + uint8_t *upred = pred + 256; + uint8_t *vpred = pred + 320; vp9_subtract_mbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8); } -void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, int src_stride, - const unsigned char *pred, int dst_stride) { +void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, int src_stride, + const uint8_t *pred, int dst_stride) { int r, c; for (r = 0; r < 16; r++) { @@ -109,8 +109,8 @@ void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, int src_stride, } #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS -void vp9_subtract_sby_s_c(short *diff, const unsigned char *src, int src_stride, - const unsigned char *pred, int dst_stride) { +void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, + const uint8_t *pred, int dst_stride) { int r, c; for (r = 0; r < 32; r++) { @@ -124,12 +124,12 @@ void vp9_subtract_sby_s_c(short *diff, const unsigned char *src, int src_stride, } } -void vp9_subtract_sbuv_s_c(short *diff, const unsigned char *usrc, - const unsigned char *vsrc, int src_stride, - const unsigned char *upred, - const unsigned char *vpred, int dst_stride) { - short *udiff = diff + 1024; - short *vdiff = diff + 1024 + 256; +void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, + const uint8_t *vsrc, int src_stride, + const uint8_t *upred, + const uint8_t *vpred, int dst_stride) { + int16_t *udiff = diff + 1024; + int16_t *vdiff = diff + 1024 + 256; int r, c; for (r = 0; r < 16; r++) { @@ -154,8 +154,8 @@ void vp9_subtract_sbuv_s_c(short *diff, const unsigned char *usrc, } #endif -void vp9_subtract_mby_c(short *diff, unsigned char *src, - unsigned char *pred, int stride) { +void vp9_subtract_mby_c(int16_t *diff, uint8_t *src, + uint8_t *pred, int stride) { vp9_subtract_mby_s_c(diff, src, stride, pred, 16); } @@ -169,7 +169,7 @@ static void subtract_mb(MACROBLOCK *x) { } static void build_dcblock_4x4(MACROBLOCK *x) { - short *src_diff_ptr = &x->src_diff[384]; + int16_t *src_diff_ptr = &x->src_diff[384]; int i; for (i = 0; i < 16; i++) { @@ -368,9 +368,9 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, BLOCKD *d = &mb->e_mbd.block[i]; vp9_token_state tokens[257][2]; unsigned best_index[257][2]; - const short *dequant_ptr = d->dequant, *coeff_ptr = b->coeff; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; + const int16_t *dequant_ptr = d->dequant, *coeff_ptr = b->coeff; + int16_t *qcoeff_ptr = d->qcoeff; + int16_t *dqcoeff_ptr = d->dqcoeff; int eob = d->eob, final_eob, sz = 0; int i0 = (type == PLANE_TYPE_Y_NO_DC); int rc, x, next; diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 3c0a0a5a2..37aae13eb 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_ENCODER_VP9_ENCODEMB_H_ #define VP9_ENCODER_VP9_ENCODEMB_H_ @@ -57,21 +56,21 @@ void vp9_fidct_mb(MACROBLOCK *x); void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch); #if CONFIG_SUPERBLOCKS -void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc, - const unsigned char *vsrc, int src_stride, - const unsigned char *upred, - const unsigned char *vpred, int dst_stride); -void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, - int src_stride, const unsigned char *pred, +void vp9_subtract_mbuv_s_c(int16_t *diff, const uint8_t *usrc, + const uint8_t *vsrc, int src_stride, + const uint8_t *upred, + const uint8_t *vpred, int dst_stride); +void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, + int src_stride, const uint8_t *pred, int dst_stride); #if CONFIG_TX32X32 -void vp9_subtract_sby_s_c(short *diff, const unsigned char *src, int src_stride, - const unsigned char *pred, int dst_stride); -void vp9_subtract_sbuv_s_c(short *diff, const unsigned char *usrc, - const unsigned char *vsrc, int src_stride, - const unsigned char *upred, - const unsigned char *vpred, int dst_stride); -#endif -#endif +void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, + const uint8_t *pred, int dst_stride); +void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, + const uint8_t *vsrc, int src_stride, + const uint8_t *upred, + const uint8_t *vpred, int dst_stride); +#endif // CONFIG_TX32X32 +#endif // CONFIG_SUPERBLOCKS -#endif +#endif // VP9_ENCODER_VP9_ENCODEMB_H_ diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h index 84cd6fb09..982c952c1 100644 --- a/vp9/encoder/vp9_encodemv.h +++ b/vp9/encoder/vp9_encodemv.h @@ -30,4 +30,5 @@ void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv, int_mv *second_best_ref_mv); void print_nmvcounts(nmv_context_counts tnmvcounts); -#endif + +#endif // VP9_ENCODER_VP9_ENCODEMV_H_ diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 37f270e15..8448de7f9 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -295,7 +295,7 @@ static const double weight_table[256] = { static double simple_weight(YV12_BUFFER_CONFIG *source) { int i, j; - unsigned char *src = source->y_buffer; + uint8_t *src = source->y_buffer; double sum_weights = 0.0; // Loop throught the Y plane raw examining levels and creating a weight for the image @@ -344,15 +344,15 @@ static void zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *r BLOCK *b = &x->block[0]; BLOCKD *d = &x->e_mbd.block[0]; - unsigned char *src_ptr = (*(b->base_src) + b->src); + uint8_t *src_ptr = (*(b->base_src) + b->src); int src_stride = b->src_stride; - unsigned char *ref_ptr; + uint8_t *ref_ptr; int ref_stride = d->pre_stride; // Set up pointers for this macro block recon buffer xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; - ref_ptr = (unsigned char *)(*(d->base_pre) + d->pre); + ref_ptr = (uint8_t *)(*(d->base_pre) + d->pre); vp9_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err)); diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index 52a48f23b..19bc4d67d 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -8,8 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ - -#if !defined __INC_FIRSTPASS_H +#ifndef VP9_ENCODER_VP9_FIRSTPASS_H_ #define VP9_ENCODER_VP9_FIRSTPASS_H_ extern void vp9_init_first_pass(VP9_COMP *cpi); @@ -20,4 +19,4 @@ extern void vp9_init_second_pass(VP9_COMP *cpi); extern void vp9_second_pass(VP9_COMP *cpi); extern void vp9_end_second_pass(VP9_COMP *cpi); -#endif +#endif // VP9_ENCODER_VP9_FIRSTPASS_H_ diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h index da2910c1c..a7aad46a5 100644 --- a/vp9/encoder/vp9_lookahead.h +++ b/vp9/encoder/vp9_lookahead.h @@ -7,8 +7,10 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ + #ifndef VP9_ENCODER_VP9_LOOKAHEAD_H_ #define VP9_ENCODER_VP9_LOOKAHEAD_H_ + #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" @@ -101,5 +103,4 @@ vp9_lookahead_peek(struct lookahead_ctx *ctx, unsigned int vp9_lookahead_depth(struct lookahead_ctx *ctx); - -#endif +#endif // VP9_ENCODER_VP9_LOOKAHEAD_H_ diff --git a/vp9/encoder/vp9_mbgraph.h b/vp9/encoder/vp9_mbgraph.h index 01ab18f30..db23eca33 100644 --- a/vp9/encoder/vp9_mbgraph.h +++ b/vp9/encoder/vp9_mbgraph.h @@ -13,4 +13,4 @@ extern void vp9_update_mbgraph_stats(VP9_COMP *cpi); -#endif /* __INC_MBGRAPH_H__ */ +#endif // VP9_ENCODER_VP9_MBGRAPH_H_ diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 61ccc7ecb..6bce1adbf 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -251,7 +251,7 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1) { - unsigned char *z = (*(b->base_src) + b->src); + uint8_t *z = (*(b->base_src) + b->src); MACROBLOCKD *xd = &x->e_mbd; int rr, rc, br, bc, hstep; @@ -270,8 +270,9 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int usehp = xd->allow_high_precision_mv; #if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) - unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - unsigned char *y; + uint8_t *y0 = *(d->base_pre) + d->pre + + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; + uint8_t *y; int buf_r1, buf_r2, buf_c1, buf_c2; // Clamping to avoid out-of-range data access @@ -289,7 +290,8 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, vfp->copymem(y0 - buf_c1 - d->pre_stride * buf_r1, d->pre_stride, xd->y_buf, y_stride, 16 + buf_r1 + buf_r2); y = xd->y_buf + y_stride * buf_r1 + buf_c1; #else - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; + uint8_t *y = *(d->base_pre) + d->pre + + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; y_stride = d->pre_stride; #endif @@ -454,7 +456,7 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv this_mv; int_mv orig_mv; int yrow_movedback = 0, ycol_movedback = 0; - unsigned char *z = (*(b->base_src) + b->src); + uint8_t *z = (*(b->base_src) + b->src); int left, right, up, down, diag; unsigned int sse; int whichdir; @@ -464,15 +466,17 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int usehp = xd->allow_high_precision_mv; #if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) - unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - unsigned char *y; + uint8_t *y0 = *(d->base_pre) + d->pre + + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; + uint8_t *y; y_stride = 32; /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18); y = xd->y_buf + y_stride + 1; #else - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; + uint8_t *y = *(d->base_pre) + d->pre + + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; y_stride = d->pre_stride; #endif @@ -933,7 +937,7 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int bestmse = INT_MAX; int_mv startmv; int_mv this_mv; - unsigned char *z = (*(b->base_src) + b->src); + uint8_t *z = (*(b->base_src) + b->src); int left, right, up, down, diag; unsigned int sse; int whichdir; @@ -942,16 +946,16 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MACROBLOCKD *xd = &x->e_mbd; #if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) - unsigned char *y0 = *(d->base_pre) + d->pre + + uint8_t *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - unsigned char *y; + uint8_t *y; y_stride = 32; /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18); y = xd->y_buf + y_stride + 1; #else - unsigned char *y = *(d->base_pre) + d->pre + + uint8_t *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; y_stride = d->pre_stride; #endif @@ -1118,15 +1122,15 @@ int vp9_hex_search MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; int i, j; - unsigned char *what = (*(b->base_src) + b->src); + uint8_t *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; int in_what_stride = d->pre_stride; int br, bc; int_mv this_mv; unsigned int bestsad = 0x7fffffff; unsigned int thissad; - unsigned char *base_offset; - unsigned char *this_offset; + uint8_t *base_offset; + uint8_t *this_offset; int k = -1; int all_in; int best_site = -1; @@ -1141,7 +1145,7 @@ int vp9_hex_search bc = ref_mv->as_mv.col; // Work out the start point for the search - base_offset = (unsigned char *)(*(d->base_pre) + d->pre); + base_offset = (uint8_t *)(*(d->base_pre) + d->pre); this_offset = base_offset + (br * (d->pre_stride)) + bc; this_mv.as_mv.row = br; this_mv.as_mv.col = bc; @@ -1264,11 +1268,11 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int *mvcost[2], int_mv *center_mv) { int i, j, step; - unsigned char *what = (*(b->base_src) + b->src); + uint8_t *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; - unsigned char *in_what; + uint8_t *in_what; int in_what_stride = d->pre_stride; - unsigned char *best_address; + uint8_t *best_address; int tot_steps; int_mv this_mv; @@ -1281,7 +1285,7 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int this_row_offset, this_col_offset; search_site *ss; - unsigned char *check_here; + uint8_t *check_here; int thissad; MACROBLOCKD *xd = &x->e_mbd; int_mv fcenter_mv; @@ -1300,7 +1304,8 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, best_mv->as_mv.col = ref_col; // Work out the start point for the search - in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); + in_what = (uint8_t *)(*(d->base_pre) + d->pre + + (ref_row * (d->pre_stride)) + ref_col); best_address = in_what; // Check the starting position @@ -1374,11 +1379,11 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int *mvjcost, int *mvcost[2], int_mv *center_mv) { int i, j, step; - unsigned char *what = (*(b->base_src) + b->src); + uint8_t *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; - unsigned char *in_what; + uint8_t *in_what; int in_what_stride = d->pre_stride; - unsigned char *best_address; + uint8_t *best_address; int tot_steps; int_mv this_mv; @@ -1393,7 +1398,7 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int this_col_offset; search_site *ss; - unsigned char *check_here; + uint8_t *check_here; unsigned int thissad; MACROBLOCKD *xd = &x->e_mbd; int_mv fcenter_mv; @@ -1412,7 +1417,8 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, best_mv->as_mv.col = ref_col; // Work out the start point for the search - in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); + in_what = (uint8_t *)(*(d->base_pre) + d->pre + + (ref_row * (d->pre_stride)) + ref_col); best_address = in_what; // Check the starting position @@ -1580,18 +1586,18 @@ int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv) { - unsigned char *what = (*(b->base_src) + b->src); + uint8_t *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; - unsigned char *in_what; + uint8_t *in_what; int in_what_stride = d->pre_stride; int mv_stride = d->pre_stride; - unsigned char *bestaddress; + uint8_t *bestaddress; int_mv *best_mv = &d->bmi.as_mv.first; int_mv this_mv; int bestsad = INT_MAX; int r, c; - unsigned char *check_here; + uint8_t *check_here; int thissad; MACROBLOCKD *xd = &x->e_mbd; @@ -1675,18 +1681,18 @@ int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int sad_per_bit, int distance, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv) { - unsigned char *what = (*(b->base_src) + b->src); + uint8_t *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; - unsigned char *in_what; + uint8_t *in_what; int in_what_stride = d->pre_stride; int mv_stride = d->pre_stride; - unsigned char *bestaddress; + uint8_t *bestaddress; int_mv *best_mv = &d->bmi.as_mv.first; int_mv this_mv; unsigned int bestsad = INT_MAX; int r, c; - unsigned char *check_here; + uint8_t *check_here; unsigned int thissad; MACROBLOCKD *xd = &x->e_mbd; @@ -1804,18 +1810,18 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], int_mv *center_mv) { - unsigned char *what = (*(b->base_src) + b->src); + uint8_t *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; - unsigned char *in_what; + uint8_t *in_what; int in_what_stride = d->pre_stride; int mv_stride = d->pre_stride; - unsigned char *bestaddress; + uint8_t *bestaddress; int_mv *best_mv = &d->bmi.as_mv.first; int_mv this_mv; unsigned int bestsad = INT_MAX; int r, c; - unsigned char *check_here; + uint8_t *check_here; unsigned int thissad; MACROBLOCKD *xd = &x->e_mbd; @@ -1827,7 +1833,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int col_min = ref_col - distance; int col_max = ref_col + distance; - DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8); + DECLARE_ALIGNED_ARRAY(16, uint16_t, sad_array8, 8); unsigned int sad_array[3]; int_mv fcenter_mv; @@ -1959,14 +1965,15 @@ int vp9_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int *mvjcost, int *mvcost[2], int_mv *center_mv) { MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; int i, j; - short this_row_offset, this_col_offset; + int this_row_offset, this_col_offset; int what_stride = b->src_stride; int in_what_stride = d->pre_stride; - unsigned char *what = (*(b->base_src) + b->src); - unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre + - (ref_mv->as_mv.row * (d->pre_stride)) + ref_mv->as_mv.col); - unsigned char *check_here; + uint8_t *what = (*(b->base_src) + b->src); + uint8_t *best_address = (uint8_t *)(*(d->base_pre) + d->pre + + (ref_mv->as_mv.row * (d->pre_stride)) + + ref_mv->as_mv.col); + uint8_t *check_here; unsigned int thissad; int_mv this_mv; unsigned int bestsad = INT_MAX; @@ -2036,14 +2043,15 @@ int vp9_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int *mvjcost, int *mvcost[2], int_mv *center_mv) { MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; int i, j; - short this_row_offset, this_col_offset; + int this_row_offset, this_col_offset; int what_stride = b->src_stride; int in_what_stride = d->pre_stride; - unsigned char *what = (*(b->base_src) + b->src); - unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre + - (ref_mv->as_mv.row * (d->pre_stride)) + ref_mv->as_mv.col); - unsigned char *check_here; + uint8_t *what = (*(b->base_src) + b->src); + uint8_t *best_address = (uint8_t *)(*(d->base_pre) + d->pre + + (ref_mv->as_mv.row * (d->pre_stride)) + + ref_mv->as_mv.col); + uint8_t *check_here; unsigned int thissad; int_mv this_mv; unsigned int bestsad = INT_MAX; diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index a3eeb29db..358d10bc6 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -83,4 +83,4 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *center_mv); -#endif +#endif // VP9_ENCODER_VP9_MCOMP_H_ diff --git a/vp9/encoder/vp9_modecosts.h b/vp9/encoder/vp9_modecosts.h index 1f2cc56ee..f43033e5f 100644 --- a/vp9/encoder/vp9_modecosts.h +++ b/vp9/encoder/vp9_modecosts.h @@ -14,4 +14,4 @@ void vp9_init_mode_costs(VP9_COMP *x); -#endif +#endif // VP9_ENCODER_VP9_MODECOSTS_H_ diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 1c7871050..9e4367310 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -2294,8 +2294,8 @@ void vp9_remove_compressor(VP9_PTR *ptr) { } -static uint64_t calc_plane_error(unsigned char *orig, int orig_stride, - unsigned char *recon, int recon_stride, +static uint64_t calc_plane_error(uint8_t *orig, int orig_stride, + uint8_t *recon, int recon_stride, unsigned int cols, unsigned int rows) { unsigned int row, col; uint64_t total_sse = 0; @@ -2311,9 +2311,9 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride, /* Handle odd-sized width */ if (col < cols) { - unsigned int border_row, border_col; - unsigned char *border_orig = orig; - unsigned char *border_recon = recon; + unsigned int border_row, border_col; + uint8_t *border_orig = orig; + uint8_t *border_recon = recon; for (border_row = 0; border_row < 16; border_row++) { for (border_col = col; border_col < cols; border_col++) { @@ -2472,7 +2472,7 @@ int vp9_update_entropy(VP9_PTR comp, int update) { #ifdef OUTPUT_YUV_SRC void vp9_write_yuv_frame(YV12_BUFFER_CONFIG *s) { - unsigned char *src = s->y_buffer; + uint8_t *src = s->y_buffer; int h = s->y_height; do { @@ -2501,7 +2501,7 @@ void vp9_write_yuv_frame(YV12_BUFFER_CONFIG *s) { #ifdef OUTPUT_YUV_REC void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { YV12_BUFFER_CONFIG *s = cm->frame_to_show; - unsigned char *src = s->y_buffer; + uint8_t *src = s->y_buffer; int h = cm->Height; do { @@ -2674,9 +2674,9 @@ static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) { int i, j; int num_edge_pels = 0; int num_pels = (frame->y_height - 2) * (frame->y_width - 2); - unsigned char *prev = frame->y_buffer + 1; - unsigned char *curr = frame->y_buffer + 1 + frame->y_stride; - unsigned char *next = frame->y_buffer + 1 + 2 * frame->y_stride; + uint8_t *prev = frame->y_buffer + 1; + uint8_t *curr = frame->y_buffer + 1 + frame->y_stride; + uint8_t *next = frame->y_buffer + 1 + 2 * frame->y_stride; for (i = 1; i < frame->y_height - 1; i++) { for (j = 1; j < frame->y_width - 1; j++) { /* Sobel hor and ver gradients */ @@ -2887,13 +2887,10 @@ static void select_interintra_mode(VP9_COMP *cpi) { } #endif -static void encode_frame_to_data_rate -( - VP9_COMP *cpi, - unsigned long *size, - unsigned char *dest, - unsigned int *frame_flags -) { +static void encode_frame_to_data_rate(VP9_COMP *cpi, + unsigned long *size, + unsigned char *dest, + unsigned int *frame_flags) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &cpi->mb.e_mbd; @@ -3189,7 +3186,7 @@ static void encode_frame_to_data_rate #if CONFIG_POSTPROC if (cpi->oxcf.noise_sensitivity > 0) { - unsigned char *src; + uint8_t *src; int l = 0; switch (cpi->oxcf.noise_sensitivity) { @@ -4473,8 +4470,8 @@ int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) { int i, j; int Total = 0; - unsigned char *src = source->y_buffer; - unsigned char *dst = dest->y_buffer; + uint8_t *src = source->y_buffer; + uint8_t *dst = dest->y_buffer; // Loop through the Y plane raw and reconstruction data summing (square differences) for (i = 0; i < source->y_height; i += 16) { diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 2406138f6..0c2b8ccdb 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -835,4 +835,5 @@ extern void vp9_alloc_compressor_data(VP9_COMP *cpi); "Failed to allocate "#lval);\ } while(0) #endif -#endif // __INC_ONYX_INT_H + +#endif // VP9_ENCODER_VP9_ONYX_INT_H_ diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 865c45e82..556851ce0 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -21,7 +21,7 @@ void vp9_yv12_copy_partial_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction) { - unsigned char *src_y, *dst_y; + uint8_t *src_y, *dst_y; int yheight; int ystride; int border; @@ -51,8 +51,8 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, int i, j; int Total = 0; int srcoffset, dstoffset; - unsigned char *src = source->y_buffer; - unsigned char *dst = dest->y_buffer; + uint8_t *src = source->y_buffer; + uint8_t *dst = dest->y_buffer; int linestocopy = (source->y_height >> (Fraction + 4)); diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h index b5c6bdd91..cb015006f 100644 --- a/vp9/encoder/vp9_picklpf.h +++ b/vp9/encoder/vp9_picklpf.h @@ -23,4 +23,4 @@ extern void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val); extern void vp9_pick_filter_level(struct yv12_buffer_config *sd, struct VP9_COMP *cpi); -#endif // __INC_PICKLPF_H +#endif // VP9_ENCODER_VP9_PICKLPF_H_ diff --git a/vp9/encoder/vp9_psnr.h b/vp9/encoder/vp9_psnr.h index 7dd5048ef..121f0dc98 100644 --- a/vp9/encoder/vp9_psnr.h +++ b/vp9/encoder/vp9_psnr.h @@ -14,4 +14,4 @@ extern double vp9_mse2psnr(double Samples, double Peak, double Mse); -#endif +#endif // VP9_ENCODER_VP9_PSNR_H_ diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 1f5f8f72d..051bd6461 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -25,29 +25,29 @@ void vp9_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) { int i, rc, eob; int zbin; int x, y, z, sz; - short *zbin_boost_ptr = b->zrun_zbin_boost; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - unsigned char *quant_shift_ptr = b->quant_shift; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; + int16_t *zbin_boost_ptr = b->zrun_zbin_boost; + int16_t *coeff_ptr = b->coeff; + int16_t *zbin_ptr = b->zbin; + int16_t *round_ptr = b->round; + int16_t *quant_ptr = b->quant; + uint8_t *quant_shift_ptr = b->quant_shift; + int16_t *qcoeff_ptr = d->qcoeff; + int16_t *dqcoeff_ptr = d->dqcoeff; + int16_t *dequant_ptr = d->dequant; + int zbin_oq_value = b->zbin_extra; int const *pt_scan ; switch (tx_type) { - case ADST_DCT : + case ADST_DCT: pt_scan = vp9_row_scan_4x4; break; - case DCT_ADST : + case DCT_ADST: pt_scan = vp9_col_scan_4x4; break; - default : + default: pt_scan = vp9_default_zig_zag1d_4x4; break; } @@ -89,16 +89,16 @@ void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) { int i, rc, eob; int zbin; int x, y, z, sz; - short *zbin_boost_ptr = b->zrun_zbin_boost; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - unsigned char *quant_shift_ptr = b->quant_shift; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; + int16_t *zbin_boost_ptr = b->zrun_zbin_boost; + int16_t *coeff_ptr = b->coeff; + int16_t *zbin_ptr = b->zbin; + int16_t *round_ptr = b->round; + int16_t *quant_ptr = b->quant; + uint8_t *quant_shift_ptr = b->quant_shift; + int16_t *qcoeff_ptr = d->qcoeff; + int16_t *dqcoeff_ptr = d->dqcoeff; + int16_t *dequant_ptr = d->dequant; + int zbin_oq_value = b->zbin_extra; vpx_memset(qcoeff_ptr, 0, 32); vpx_memset(dqcoeff_ptr, 0, 32); @@ -174,17 +174,17 @@ void vp9_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) { int i, rc, eob; int zbin; int x, y, z, sz; - short *zbin_boost_ptr = b->zrun_zbin_boost; + int16_t *zbin_boost_ptr = b->zrun_zbin_boost; int zbin_zrun_index = 0; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - unsigned char *quant_shift_ptr = b->quant_shift; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; + int16_t *coeff_ptr = b->coeff; + int16_t *zbin_ptr = b->zbin; + int16_t *round_ptr = b->round; + int16_t *quant_ptr = b->quant; + uint8_t *quant_shift_ptr = b->quant_shift; + int16_t *qcoeff_ptr = d->qcoeff; + int16_t *dqcoeff_ptr = d->dqcoeff; + int16_t *dequant_ptr = d->dequant; + int zbin_oq_value = b->zbin_extra; // double q2nd = 4; vpx_memset(qcoeff_ptr, 0, 32); vpx_memset(dqcoeff_ptr, 0, 32); @@ -224,19 +224,19 @@ void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) { int i, rc, eob; int zbin; int x, y, z, sz; - short *zbin_boost_ptr = b->zrun_zbin_boost_8x8; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin_8x8; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - unsigned char *quant_shift_ptr = b->quant_shift; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; + int16_t *zbin_boost_ptr = b->zrun_zbin_boost_8x8; + int16_t *coeff_ptr = b->coeff; + int16_t *zbin_ptr = b->zbin_8x8; + int16_t *round_ptr = b->round; + int16_t *quant_ptr = b->quant; + uint8_t *quant_shift_ptr = b->quant_shift; + int16_t *qcoeff_ptr = d->qcoeff; + int16_t *dqcoeff_ptr = d->dqcoeff; + int16_t *dequant_ptr = d->dequant; + int zbin_oq_value = b->zbin_extra; - vpx_memset(qcoeff_ptr, 0, 64 * sizeof(short)); - vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(short)); + vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t)); + vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t)); eob = -1; @@ -323,20 +323,20 @@ void vp9_quantize_mb_16x16(MACROBLOCK *x) { vp9_quantize_mbuv_8x8(x); } -static void quantize(short *zbin_boost_orig_ptr, - short *coeff_ptr, int n_coeffs, int max_coeffs, - short *zbin_ptr, short *round_ptr, short *quant_ptr, - unsigned char *quant_shift_ptr, - short *qcoeff_ptr, short *dqcoeff_ptr, - short *dequant_ptr, short zbin_oq_value, +static void quantize(int16_t *zbin_boost_orig_ptr, + int16_t *coeff_ptr, int n_coeffs, int max_coeffs, + int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, + uint8_t *quant_shift_ptr, + int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, + int16_t *dequant_ptr, int zbin_oq_value, int *eob_ptr, const int *scan, int mul) { int i, rc, eob; int zbin; int x, y, z, sz; - short *zbin_boost_ptr = zbin_boost_orig_ptr; + int16_t *zbin_boost_ptr = zbin_boost_orig_ptr; - vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(short)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(short)); + vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t)); + vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t)); eob = -1; for (i = 0; i < max_coeffs; i++) { @@ -425,15 +425,15 @@ void vp9_regular_quantize_b_4x4_pair(BLOCK *b1, BLOCK *b2, vp9_regular_quantize_b_4x4(b2, d2); } -static void invert_quant(short *quant, - unsigned char *shift, short d) { +static void invert_quant(int16_t *quant, + uint8_t *shift, int d) { unsigned t; int l; t = d; for (l = 0; t > 1; l++) t >>= 1; t = 1 + (1 << (16 + l)) / d; - *quant = (short)(t - (1 << 16)); + *quant = (int16_t)(t - (1 << 16)); *shift = l; } @@ -738,7 +738,7 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS x->block[i].zrun_zbin_boost_32x32 = cpi->zrun_zbin_boost_y1_32x32[QIndex]; #endif - x->block[i].zbin_extra = (short)zbin_extra; + x->block[i].zbin_extra = (int16_t)zbin_extra; // Segment max eob offset feature. if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) { @@ -780,7 +780,7 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_uv_8x8[QIndex]; x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_uv_16x16[QIndex]; - x->block[i].zbin_extra = (short)zbin_extra; + x->block[i].zbin_extra = (int16_t)zbin_extra; // Segment max eob offset feature. if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) { @@ -813,7 +813,7 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex]; x->block[24].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y2_8x8[QIndex]; x->block[24].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y2_16x16[QIndex]; - x->block[24].zbin_extra = (short)zbin_extra; + x->block[24].zbin_extra = (int16_t)zbin_extra; // TBD perhaps not use for Y2 // Segment max eob offset feature. @@ -842,7 +842,7 @@ void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) { cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7; for (i = 0; i < 16; i++) { - x->block[i].zbin_extra = (short)zbin_extra; + x->block[i].zbin_extra = (int16_t)zbin_extra; } // UV @@ -852,7 +852,7 @@ void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) { x->act_zbin_adj)) >> 7; for (i = 16; i < 24; i++) { - x->block[i].zbin_extra = (short)zbin_extra; + x->block[i].zbin_extra = (int16_t)zbin_extra; } // Y2 @@ -861,7 +861,7 @@ void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) { cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7; - x->block[24].zbin_extra = (short)zbin_extra; + x->block[24].zbin_extra = (int16_t)zbin_extra; } void vp9_frame_init_quantizer(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index 832a486f5..dbc3246b5 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -95,4 +95,4 @@ extern void vp9_mb_init_quantizer(struct VP9_COMP *cpi, MACROBLOCK *x); extern void vp9_init_quantizer(struct VP9_COMP *cpi); -#endif +#endif // VP9_ENCODER_VP9_QUANTIZE_H_ diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index a2572116c..c6484817f 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -35,4 +35,4 @@ extern int vp9_gfboost_qadjust(int qindex); extern int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex); void vp9_setup_inter_frame(VP9_COMP *cpi); -#endif +#endif // VP9_ENCODER_VP9_RATECTRL_H_ diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index df41bac88..3f9baef75 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -403,7 +403,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) { } } -int vp9_block_error_c(short *coeff, short *dqcoeff, int block_size) { +int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) { int i, error = 0; for (i = 0; i < block_size; i++) { @@ -469,9 +469,9 @@ int vp9_mbuverror_c(MACROBLOCK *mb) { } int vp9_uvsse(MACROBLOCK *x) { - unsigned char *uptr, *vptr; - unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src); - unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src); + uint8_t *uptr, *vptr; + uint8_t *upred_ptr = (*(x->block[16].base_src) + x->block[16].src); + uint8_t *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src); int uv_stride = x->block[16].src_stride; unsigned int sse1 = 0; @@ -520,7 +520,7 @@ static int cost_coeffs_2x2(MACROBLOCK *mb, int eob = b->eob; int pt; /* surrounding block/prev coef predictor */ int cost = 0; - short *qcoeff_ptr = b->qcoeff; + int16_t *qcoeff_ptr = b->qcoeff; VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); assert(eob <= 4); @@ -550,7 +550,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, int cost = 0, default_eob, seg_eob; int pt; /* surrounding block/prev coef predictor */ int const *scan, *band; - short *qcoeff_ptr = b->qcoeff; + int16_t *qcoeff_ptr = b->qcoeff; MACROBLOCKD *xd = &mb->e_mbd; MB_MODE_INFO *mbmi = &mb->e_mbd.mode_info_context->mbmi; TX_TYPE tx_type = DCT_DCT; @@ -928,7 +928,7 @@ static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, txfm_cache, TX_16X16); } -static void copy_predictor(unsigned char *dst, const unsigned char *predictor) { +static void copy_predictor(uint8_t *dst, const uint8_t *predictor) { const unsigned int *p = (const unsigned int *)predictor; unsigned int *d = (unsigned int *)dst; d[0] = p[0]; @@ -951,7 +951,8 @@ static int rdcost_sby_32x32(MACROBLOCK *x) { return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); } -static int vp9_sb_block_error_c(short *coeff, short *dqcoeff, int block_size) { +static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, + int block_size) { int i; int64_t error = 0; @@ -970,7 +971,7 @@ static void super_block_yrd_32x32(MACROBLOCK *x, MACROBLOCKD * const xd = &x->e_mbd; SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data; #if DEBUG_ERROR || CONFIG_DWT32X32HYBRID - short out[1024]; + int16_t out[1024]; #endif vp9_transform_sby_32x32(x); @@ -1079,7 +1080,7 @@ static void super_block_yrd(VP9_COMP *cpi, } #endif -static void copy_predictor_8x8(unsigned char *dst, const unsigned char *predictor) { +static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) { const unsigned int *p = (const unsigned int *)predictor; unsigned int *d = (unsigned int *)dst; d[0] = p[0]; @@ -1129,8 +1130,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, * a temp buffer that meets the stride requirements, but we are only * interested in the left 4x4 block * */ - DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16 * 4); - DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16); + DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 4); + DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); #if CONFIG_NEWBINTRAMODES b->bmi.as_mode.context = vp9_find_bpred_context(b); @@ -1488,8 +1489,8 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, * a temp buffer that meets the stride requirements, but we are only * interested in the left 8x8 block * */ - DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16 * 8); - DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16 * 4); + DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 8); + DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16 * 4); // perform transformation of dimension 8x8 // note the input and output index mapping @@ -2921,7 +2922,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, } static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, - unsigned char *ref_y_buffer, int ref_y_stride, + uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, enum BlockSize block_size ) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; @@ -2933,8 +2934,8 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, int this_sad = INT_MAX; BLOCK *b = &x->block[0]; - unsigned char *src_y_ptr = *(b->base_src); - unsigned char *ref_y_ptr; + uint8_t *src_y_ptr = *(b->base_src); + uint8_t *ref_y_ptr; int row_offset, col_offset; // Get the sad for each candidate reference mv @@ -3182,9 +3183,9 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int_mv frame_nearest_mv[MAX_REF_FRAMES], int_mv frame_near_mv[MAX_REF_FRAMES], int frame_mdcounts[4][4], - unsigned char *y_buffer[4], - unsigned char *u_buffer[4], - unsigned char *v_buffer[4]) { + uint8_t *y_buffer[4], + uint8_t *u_buffer[4], + uint8_t *v_buffer[4]) { YV12_BUFFER_CONFIG *yv12 = &cpi->common.yv12_fb[idx]; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; @@ -3557,7 +3558,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; int frame_mdcounts[4][4]; - unsigned char *y_buffer[4], *u_buffer[4], *v_buffer[4]; + uint8_t *y_buffer[4], *u_buffer[4], *v_buffer[4]; unsigned int ref_costs[MAX_REF_FRAMES]; int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1]; @@ -4501,9 +4502,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int comp_pred, i; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; int frame_mdcounts[4][4]; - unsigned char *y_buffer[4]; - unsigned char *u_buffer[4]; - unsigned char *v_buffer[4]; + uint8_t *y_buffer[4]; + uint8_t *u_buffer[4]; + uint8_t *v_buffer[4]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; int idx_list[4] = { 0, cpi->common.lst_fb_idx, cpi->common.gld_fb_idx, diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 4e41714f8..4c2c33a74 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -38,4 +38,4 @@ extern void vp9_init_me_luts(); extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv); -#endif +#endif // VP9_ENCODER_VP9_RDOPT_H_ diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c index 8e12f16db..e5249e537 100644 --- a/vp9/encoder/vp9_sad_c.c +++ b/vp9/encoder/vp9_sad_c.c @@ -14,62 +14,61 @@ #include "vpx_ports/config.h" #include "vpx/vpx_integer.h" -unsigned int vp9_sad32x32_c(const unsigned char *src_ptr, +unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, int max_sad) { return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32); } -unsigned int vp9_sad16x16_c(const unsigned char *src_ptr, +unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, int max_sad) { return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 16); } -unsigned int vp9_sad8x8_c(const unsigned char *src_ptr, +unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, int max_sad) { return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 8); } -unsigned int vp9_sad16x8_c(const unsigned char *src_ptr, +unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, int max_sad) { return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 8); } -unsigned int vp9_sad8x16_c(const unsigned char *src_ptr, +unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, int max_sad) { return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16); } -unsigned int vp9_sad4x4_c(const unsigned char *src_ptr, +unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, int max_sad) { return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4); } -void vp9_sad32x32x3_c(const unsigned char *src_ptr, +void vp9_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, - unsigned int *sad_array - ) { + unsigned int *sad_array) { sad_array[0] = vp9_sad32x32_c(src_ptr, src_stride, ref_ptr, ref_stride, 0x7fffffff); sad_array[1] = vp9_sad32x32_c(src_ptr, src_stride, @@ -78,41 +77,40 @@ void vp9_sad32x32x3_c(const unsigned char *src_ptr, ref_ptr + 2, ref_stride, 0x7fffffff); } -void vp9_sad32x32x8_c(const unsigned char *src_ptr, +void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, - unsigned short *sad_array - ) { - sad_array[0] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad32x32_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); + uint16_t *sad_array) { + sad_array[0] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); } -void vp9_sad16x16x3_c(const unsigned char *src_ptr, +void vp9_sad16x16x3_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad16x16_c(src_ptr, src_stride, @@ -123,40 +121,40 @@ void vp9_sad16x16x3_c(const unsigned char *src_ptr, ref_ptr + 2, ref_stride, 0x7fffffff); } -void vp9_sad16x16x8_c(const unsigned char *src_ptr, +void vp9_sad16x16x8_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, - unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad16x16_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); + uint16_t *sad_array) { + sad_array[0] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); } -void vp9_sad16x8x3_c(const unsigned char *src_ptr, +void vp9_sad16x8x3_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad16x8_c(src_ptr, src_stride, @@ -167,40 +165,40 @@ void vp9_sad16x8x3_c(const unsigned char *src_ptr, ref_ptr + 2, ref_stride, 0x7fffffff); } -void vp9_sad16x8x8_c(const unsigned char *src_ptr, +void vp9_sad16x8x8_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, - unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad16x8_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); + uint16_t *sad_array) { + sad_array[0] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); } -void vp9_sad8x8x3_c(const unsigned char *src_ptr, +void vp9_sad8x8x3_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad8x8_c(src_ptr, src_stride, @@ -211,40 +209,40 @@ void vp9_sad8x8x3_c(const unsigned char *src_ptr, ref_ptr + 2, ref_stride, 0x7fffffff); } -void vp9_sad8x8x8_c(const unsigned char *src_ptr, +void vp9_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, - unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad8x8_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); + uint16_t *sad_array) { + sad_array[0] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); } -void vp9_sad8x16x3_c(const unsigned char *src_ptr, +void vp9_sad8x16x3_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad8x16_c(src_ptr, src_stride, @@ -255,40 +253,40 @@ void vp9_sad8x16x3_c(const unsigned char *src_ptr, ref_ptr + 2, ref_stride, 0x7fffffff); } -void vp9_sad8x16x8_c(const unsigned char *src_ptr, +void vp9_sad8x16x8_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, - unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad8x16_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); + uint16_t *sad_array) { + sad_array[0] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); } -void vp9_sad4x4x3_c(const unsigned char *src_ptr, +void vp9_sad4x4x3_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad4x4_c(src_ptr, src_stride, @@ -299,43 +297,42 @@ void vp9_sad4x4x3_c(const unsigned char *src_ptr, ref_ptr + 2, ref_stride, 0x7fffffff); } -void vp9_sad4x4x8_c(const unsigned char *src_ptr, +void vp9_sad4x4x8_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, - unsigned short *sad_array) { - sad_array[0] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr, ref_stride, - 0x7fffffff); - sad_array[1] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 1, ref_stride, - 0x7fffffff); - sad_array[2] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 2, ref_stride, - 0x7fffffff); - sad_array[3] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 3, ref_stride, - 0x7fffffff); - sad_array[4] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 4, ref_stride, - 0x7fffffff); - sad_array[5] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 5, ref_stride, - 0x7fffffff); - sad_array[6] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 6, ref_stride, - 0x7fffffff); - sad_array[7] = (unsigned short)vp9_sad4x4_c(src_ptr, src_stride, - ref_ptr + 7, ref_stride, - 0x7fffffff); + uint16_t *sad_array) { + sad_array[0] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); } -void vp9_sad32x32x4d_c(const unsigned char *src_ptr, +void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, - unsigned char *ref_ptr[], + uint8_t *ref_ptr[], int ref_stride, - unsigned int *sad_array - ) { + unsigned int *sad_array) { sad_array[0] = vp9_sad32x32_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff); sad_array[1] = vp9_sad32x32_c(src_ptr, src_stride, @@ -346,9 +343,9 @@ void vp9_sad32x32x4d_c(const unsigned char *src_ptr, ref_ptr[3], ref_stride, 0x7fffffff); } -void vp9_sad16x16x4d_c(const unsigned char *src_ptr, +void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int src_stride, - unsigned char *ref_ptr[], + uint8_t *ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad16x16_c(src_ptr, src_stride, @@ -361,9 +358,9 @@ void vp9_sad16x16x4d_c(const unsigned char *src_ptr, ref_ptr[3], ref_stride, 0x7fffffff); } -void vp9_sad16x8x4d_c(const unsigned char *src_ptr, +void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int src_stride, - unsigned char *ref_ptr[], + uint8_t *ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad16x8_c(src_ptr, src_stride, @@ -376,9 +373,9 @@ void vp9_sad16x8x4d_c(const unsigned char *src_ptr, ref_ptr[3], ref_stride, 0x7fffffff); } -void vp9_sad8x8x4d_c(const unsigned char *src_ptr, +void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, - unsigned char *ref_ptr[], + uint8_t *ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad8x8_c(src_ptr, src_stride, @@ -391,9 +388,9 @@ void vp9_sad8x8x4d_c(const unsigned char *src_ptr, ref_ptr[3], ref_stride, 0x7fffffff); } -void vp9_sad8x16x4d_c(const unsigned char *src_ptr, +void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int src_stride, - unsigned char *ref_ptr[], + uint8_t *ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad8x16_c(src_ptr, src_stride, @@ -406,9 +403,9 @@ void vp9_sad8x16x4d_c(const unsigned char *src_ptr, ref_ptr[3], ref_stride, 0x7fffffff); } -void vp9_sad4x4x4d_c(const unsigned char *src_ptr, +void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, - unsigned char *ref_ptr[], + uint8_t *ref_ptr[], int ref_stride, unsigned int *sad_array) { sad_array[0] = vp9_sad4x4_c(src_ptr, src_stride, @@ -422,9 +419,9 @@ void vp9_sad4x4x4d_c(const unsigned char *src_ptr, } /* Copy 2 macroblocks to a buffer */ -void vp9_copy32xn_c(unsigned char *src_ptr, +void vp9_copy32xn_c(uint8_t *src_ptr, int src_stride, - unsigned char *dst_ptr, + uint8_t *dst_ptr, int dst_stride, int height) { int r; diff --git a/vp9/encoder/vp9_satd_c.c b/vp9/encoder/vp9_satd_c.c index 63944f0e9..212c2243d 100644 --- a/vp9/encoder/vp9_satd_c.c +++ b/vp9/encoder/vp9_satd_c.c @@ -11,16 +11,17 @@ #include #include "vpx_ports/mem.h" #include "./vp9_rtcd.h" -unsigned int vp9_satd16x16_c(const unsigned char *src_ptr, + +unsigned int vp9_satd16x16_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, unsigned int *psatd) { int r, c, i; unsigned int satd = 0; - DECLARE_ALIGNED(16, short, diff_in[256]); - DECLARE_ALIGNED(16, short, diff_out[16]); - short *in; + DECLARE_ALIGNED(16, int16_t, diff_in[256]); + DECLARE_ALIGNED(16, int16_t, diff_out[16]); + int16_t *in; for (r = 0; r < 16; r++) { for (c = 0; c < 16; c++) { diff --git a/vp9/encoder/vp9_segmentation.h b/vp9/encoder/vp9_segmentation.h index 493a76700..3c75c68d8 100644 --- a/vp9/encoder/vp9_segmentation.h +++ b/vp9/encoder/vp9_segmentation.h @@ -43,4 +43,4 @@ extern void vp9_set_segment_data(VP9_PTR ptr, signed char *feature_data, extern void vp9_choose_segmap_coding_method(VP9_COMP *cpi); -#endif /* __INC_SEGMENTATION_H__ */ +#endif // VP9_ENCODER_VP9_SEGMENTATION_H_ diff --git a/vp9/encoder/vp9_ssim.c b/vp9/encoder/vp9_ssim.c index 4cbb9149b..363ed8492 100644 --- a/vp9/encoder/vp9_ssim.c +++ b/vp9/encoder/vp9_ssim.c @@ -11,7 +11,7 @@ #include "vp9/encoder/vp9_onyx_int.h" -void vp9_ssim_parms_16x16_c(unsigned char *s, int sp, unsigned char *r, +void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr) { @@ -26,7 +26,7 @@ void vp9_ssim_parms_16x16_c(unsigned char *s, int sp, unsigned char *r, } } } -void vp9_ssim_parms_8x8_c(unsigned char *s, int sp, unsigned char *r, int rp, +void vp9_ssim_parms_8x8_c(uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr) { @@ -65,13 +65,13 @@ static double similarity(unsigned long sum_s, unsigned long sum_r, return ssim_n * 1.0 / ssim_d; } -static double ssim_16x16(unsigned char *s, int sp, unsigned char *r, int rp) { +static double ssim_16x16(uint8_t *s, int sp, uint8_t *r, int rp) { unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; vp9_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256); } -static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp) { +static double ssim_8x8(uint8_t *s, int sp, uint8_t *r, int rp) { unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; vp9_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); @@ -81,7 +81,7 @@ static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp) { // We are using a 8x8 moving window with starting location of each 8x8 window // on the 4x4 pixel grid. Such arrangement allows the windows to overlap // block boundaries to penalize blocking artifacts. -double vp9_ssim2(unsigned char *img1, unsigned char *img2, int stride_img1, +double vp9_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1, int stride_img2, int width, int height) { int i, j; int samples = 0; diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 89d6b44a0..159d6faa5 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -35,19 +35,16 @@ #if VP9_TEMPORAL_ALT_REF -static void temporal_filter_predictors_mb_c -( - MACROBLOCKD *xd, - unsigned char *y_mb_ptr, - unsigned char *u_mb_ptr, - unsigned char *v_mb_ptr, - int stride, - int mv_row, - int mv_col, - unsigned char *pred -) { +static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, + uint8_t *y_mb_ptr, + uint8_t *u_mb_ptr, + uint8_t *v_mb_ptr, + int stride, + int mv_row, + int mv_col, + uint8_t *pred) { int offset; - unsigned char *yptr, *uptr, *vptr; + uint8_t *yptr, *uptr, *vptr; int omv_row, omv_col; // Y @@ -75,23 +72,20 @@ static void temporal_filter_predictors_mb_c (omv_col & 15), (omv_row & 15), &pred[256], 8); xd->subpixel_predict8x8(vptr, stride, (omv_col & 15), (omv_row & 15), &pred[320], 8); - } - else { + } else { vp9_copy_mem8x8(uptr, stride, &pred[256], 8); vp9_copy_mem8x8(vptr, stride, &pred[320], 8); } } -void vp9_temporal_filter_apply_c -( - unsigned char *frame1, - unsigned int stride, - unsigned char *frame2, - unsigned int block_size, - int strength, - int filter_weight, - unsigned int *accumulator, - unsigned short *count -) { + +void vp9_temporal_filter_apply_c(uint8_t *frame1, + unsigned int stride, + uint8_t *frame2, + unsigned int block_size, + int strength, + int filter_weight, + unsigned int *accumulator, + uint16_t *count) { unsigned int i, j, k; int modifier; int byte = 0; @@ -129,14 +123,11 @@ void vp9_temporal_filter_apply_c #if ALT_REF_MC_ENABLED -static int temporal_filter_find_matching_mb_c -( - VP9_COMP *cpi, - YV12_BUFFER_CONFIG *arf_frame, - YV12_BUFFER_CONFIG *frame_ptr, - int mb_offset, - int error_thresh -) { +static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, + YV12_BUFFER_CONFIG *arf_frame, + YV12_BUFFER_CONFIG *frame_ptr, + int mb_offset, + int error_thresh) { MACROBLOCK *x = &cpi->mb; int step_param; int further_steps; @@ -149,10 +140,10 @@ static int temporal_filter_find_matching_mb_c int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ // Save input state - unsigned char **base_src = b->base_src; + uint8_t **base_src = b->base_src; int src = b->src; int src_stride = b->src_stride; - unsigned char **base_pre = d->base_pre; + uint8_t **base_pre = d->base_pre; int pre = d->pre; int pre_stride = d->pre_stride; @@ -216,13 +207,10 @@ static int temporal_filter_find_matching_mb_c } #endif -static void temporal_filter_iterate_c -( - VP9_COMP *cpi, - int frame_count, - int alt_ref_index, - int strength -) { +static void temporal_filter_iterate_c(VP9_COMP *cpi, + int frame_count, + int alt_ref_index, + int strength) { int byte; int frame; int mb_col, mb_row; @@ -232,16 +220,16 @@ static void temporal_filter_iterate_c int mb_y_offset = 0; int mb_uv_offset = 0; DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16 * 16 + 8 * 8 + 8 * 8); - DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16 * 16 + 8 * 8 + 8 * 8); + DECLARE_ALIGNED_ARRAY(16, uint16_t, count, 16 * 16 + 8 * 8 + 8 * 8); MACROBLOCKD *mbd = &cpi->mb.e_mbd; YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index]; - unsigned char *dst1, *dst2; - DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16 * 16 + 8 * 8 + 8 * 8); + uint8_t *dst1, *dst2; + DECLARE_ALIGNED_ARRAY(16, uint8_t, predictor, 16 * 16 + 8 * 8 + 8 * 8); // Save input state - unsigned char *y_buffer = mbd->pre.y_buffer; - unsigned char *u_buffer = mbd->pre.u_buffer; - unsigned char *v_buffer = mbd->pre.v_buffer; + uint8_t *y_buffer = mbd->pre.y_buffer; + uint8_t *u_buffer = mbd->pre.u_buffer; + uint8_t *v_buffer = mbd->pre.v_buffer; for (mb_row = 0; mb_row < mb_rows; mb_row++) { #if ALT_REF_MC_ENABLED @@ -266,7 +254,7 @@ static void temporal_filter_iterate_c int stride; vpx_memset(accumulator, 0, 384 * sizeof(unsigned int)); - vpx_memset(count, 0, 384 * sizeof(unsigned short)); + vpx_memset(count, 0, 384 * sizeof(uint16_t)); #if ALT_REF_MC_ENABLED cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND)); @@ -341,7 +329,7 @@ static void temporal_filter_iterate_c pval *= cpi->fixed_divide[count[k]]; pval >>= 19; - dst1[byte] = (unsigned char)pval; + dst1[byte] = (uint8_t)pval; // move to next pixel byte++; @@ -362,13 +350,13 @@ static void temporal_filter_iterate_c unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= cpi->fixed_divide[count[k]]; pval >>= 19; - dst1[byte] = (unsigned char)pval; + dst1[byte] = (uint8_t)pval; // V pval = accumulator[m] + (count[m] >> 1); pval *= cpi->fixed_divide[count[m]]; pval >>= 19; - dst2[byte] = (unsigned char)pval; + dst2[byte] = (uint8_t)pval; // move to next pixel byte++; diff --git a/vp9/encoder/vp9_temporal_filter.h b/vp9/encoder/vp9_temporal_filter.h index abcb21926..27fc35f82 100644 --- a/vp9/encoder/vp9_temporal_filter.h +++ b/vp9/encoder/vp9_temporal_filter.h @@ -13,4 +13,4 @@ extern void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance); -#endif +#endif // VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 2f907801b..dffd294dd 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_ENCODER_VP9_TOKENIZE_H_ #define VP9_ENCODER_VP9_TOKENIZE_H_ @@ -18,15 +17,15 @@ void vp9_tokenize_initialize(); typedef struct { - short Token; - short Extra; + int16_t Token; + int16_t Extra; } TOKENVALUE; typedef struct { const vp9_prob *context_tree; - short Extra; - unsigned char Token; - unsigned char skip_eob_node; + int16_t Extra; + uint8_t Token; + uint8_t skip_eob_node; } TOKENEXTRA; typedef int64_t vp9_coeff_accum[COEF_BANDS][PREV_COEF_CONTEXTS] @@ -80,4 +79,4 @@ extern const int *vp9_dct_value_cost_ptr; */ extern const TOKENVALUE *vp9_dct_value_tokens_ptr; -#endif /* tokenize_h */ +#endif // VP9_ENCODER_VP9_TOKENIZE_H_ diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h index 5da4a17a9..4e0e5e12c 100644 --- a/vp9/encoder/vp9_treewriter.h +++ b/vp9/encoder/vp9_treewriter.h @@ -105,4 +105,4 @@ void vp9_cost_tokens(int *Costs, const vp9_prob *, vp9_tree); void vp9_cost_tokens_skip(int *c, const vp9_prob *p, vp9_tree t); -#endif +#endif // VP9_ENCODER_VP9_TREEWRITER_H_ diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index 87036c1b0..3f0af0855 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -8,54 +8,55 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_ENCODER_VP9_VARIANCE_H_ #define VP9_ENCODER_VP9_VARIANCE_H_ -typedef unsigned int(*vp9_sad_fn_t)(const unsigned char *src_ptr, +#include "vpx/vpx_integer.h" + +typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad); -typedef void (*vp9_copy32xn_fn_t)(const unsigned char *src_ptr, +typedef void (*vp9_copy32xn_fn_t)(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, int n); -typedef void (*vp9_sad_multi_fn_t)(const unsigned char *src_ptr, +typedef void (*vp9_sad_multi_fn_t)(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -typedef void (*vp9_sad_multi1_fn_t)(const unsigned char *src_ptr, +typedef void (*vp9_sad_multi1_fn_t)(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, unsigned short *sad_array); -typedef void (*vp9_sad_multi_d_fn_t)(const unsigned char *src_ptr, +typedef void (*vp9_sad_multi_d_fn_t)(const uint8_t *src_ptr, int source_stride, - const unsigned char ** ref_ptr, + const uint8_t ** ref_ptr, int ref_stride, unsigned int *sad_array); -typedef unsigned int (*vp9_variance_fn_t)(const unsigned char *src_ptr, +typedef unsigned int (*vp9_variance_fn_t)(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); -typedef unsigned int (*vp9_subpixvariance_fn_t)(const unsigned char *src_ptr, +typedef unsigned int (*vp9_subpixvariance_fn_t)(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int Refstride, unsigned int *sse); -typedef void (*vp9_ssimpf_fn_t)(unsigned char *s, int sp, unsigned char *r, +typedef void (*vp9_ssimpf_fn_t)(uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, @@ -63,9 +64,9 @@ typedef void (*vp9_ssimpf_fn_t)(unsigned char *s, int sp, unsigned char *r, typedef unsigned int (*vp9_getmbss_fn_t)(const short *); -typedef unsigned int (*vp9_get16x16prederror_fn_t)(const unsigned char *src_ptr, +typedef unsigned int (*vp9_get16x16prederror_fn_t)(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int ref_stride); typedef struct variance_vtable { @@ -81,4 +82,4 @@ typedef struct variance_vtable { vp9_copy32xn_fn_t copymem; } vp9_variance_fn_ptr_t; -#endif +#endif // VP9_ENCODER_VP9_VARIANCE_H_ diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index a91cb122c..ecb92572b 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -14,7 +14,7 @@ #include "vp9/common/vp9_subpelvar.h" #include "vpx/vpx_integer.h" -unsigned int vp9_get_mb_ss_c(const short *src_ptr) { +unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { unsigned int i, sum = 0; for (i = 0; i < 256; i++) { @@ -24,12 +24,10 @@ unsigned int vp9_get_mb_ss_c(const short *src_ptr) { return sum; } - - #if CONFIG_SUPERBLOCKS -unsigned int vp9_variance32x32_c(const unsigned char *src_ptr, +unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; @@ -41,9 +39,9 @@ unsigned int vp9_variance32x32_c(const unsigned char *src_ptr, } #endif -unsigned int vp9_variance16x16_c(const unsigned char *src_ptr, +unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; @@ -54,9 +52,9 @@ unsigned int vp9_variance16x16_c(const unsigned char *src_ptr, return (var - (((unsigned int)avg * avg) >> 8)); } -unsigned int vp9_variance8x16_c(const unsigned char *src_ptr, +unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; @@ -67,9 +65,9 @@ unsigned int vp9_variance8x16_c(const unsigned char *src_ptr, return (var - (((unsigned int)avg * avg) >> 7)); } -unsigned int vp9_variance16x8_c(const unsigned char *src_ptr, +unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; @@ -81,9 +79,9 @@ unsigned int vp9_variance16x8_c(const unsigned char *src_ptr, } -unsigned int vp9_variance8x8_c(const unsigned char *src_ptr, +unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; @@ -94,9 +92,9 @@ unsigned int vp9_variance8x8_c(const unsigned char *src_ptr, return (var - (((unsigned int)avg * avg) >> 6)); } -unsigned int vp9_variance4x4_c(const unsigned char *src_ptr, +unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; @@ -108,9 +106,9 @@ unsigned int vp9_variance4x4_c(const unsigned char *src_ptr, } -unsigned int vp9_mse16x16_c(const unsigned char *src_ptr, +unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { unsigned int var; @@ -122,16 +120,16 @@ unsigned int vp9_mse16x16_c(const unsigned char *src_ptr, } -unsigned int vp9_sub_pixel_variance4x4_c(const unsigned char *src_ptr, +unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - const unsigned char *dst_ptr, + const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; - unsigned short FData3[5 * 4]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + const int16_t *HFilter, *VFilter; + uint16_t FData3[5 * 4]; // Temp data bufffer used in filtering HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -146,16 +144,16 @@ unsigned int vp9_sub_pixel_variance4x4_c(const unsigned char *src_ptr, } -unsigned int vp9_sub_pixel_variance8x8_c(const unsigned char *src_ptr, +unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - const unsigned char *dst_ptr, + const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - unsigned short FData3[9 * 8]; // Temp data bufffer used in filtering - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; + uint16_t FData3[9 * 8]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + const int16_t *HFilter, *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -166,16 +164,16 @@ unsigned int vp9_sub_pixel_variance8x8_c(const unsigned char *src_ptr, return vp9_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } -unsigned int vp9_sub_pixel_variance16x16_c(const unsigned char *src_ptr, +unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - const unsigned char *dst_ptr, + const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - unsigned short FData3[17 * 16]; // Temp data bufffer used in filtering - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; + uint16_t FData3[17 * 16]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + const int16_t *HFilter, *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -187,16 +185,16 @@ unsigned int vp9_sub_pixel_variance16x16_c(const unsigned char *src_ptr, } #if CONFIG_SUPERBLOCKS -unsigned int vp9_sub_pixel_variance32x32_c(const unsigned char *src_ptr, +unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - const unsigned char *dst_ptr, + const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - unsigned short FData3[33 * 32]; // Temp data bufffer used in filtering - unsigned char temp2[36 * 32]; - const short *HFilter, *VFilter; + uint16_t FData3[33 * 32]; // Temp data bufffer used in filtering + uint8_t temp2[36 * 32]; + const int16_t *HFilter, *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -208,9 +206,9 @@ unsigned int vp9_sub_pixel_variance32x32_c(const unsigned char *src_ptr, } #endif -unsigned int vp9_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, +unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, @@ -218,9 +216,9 @@ unsigned int vp9_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, } #if CONFIG_SUPERBLOCKS -unsigned int vp9_variance_halfpixvar32x32_h_c(const unsigned char *src_ptr, +unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, @@ -229,9 +227,9 @@ unsigned int vp9_variance_halfpixvar32x32_h_c(const unsigned char *src_ptr, #endif -unsigned int vp9_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, +unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, @@ -239,9 +237,9 @@ unsigned int vp9_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, } #if CONFIG_SUPERBLOCKS -unsigned int vp9_variance_halfpixvar32x32_v_c(const unsigned char *src_ptr, +unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, @@ -249,9 +247,9 @@ unsigned int vp9_variance_halfpixvar32x32_v_c(const unsigned char *src_ptr, } #endif -unsigned int vp9_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, +unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, @@ -259,9 +257,9 @@ unsigned int vp9_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, } #if CONFIG_SUPERBLOCKS -unsigned int vp9_variance_halfpixvar32x32_hv_c(const unsigned char *src_ptr, +unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, int recon_stride, unsigned int *sse) { return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, @@ -269,11 +267,11 @@ unsigned int vp9_variance_halfpixvar32x32_hv_c(const unsigned char *src_ptr, } #endif -unsigned int vp9_sub_pixel_mse16x16_c(const unsigned char *src_ptr, +unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - const unsigned char *dst_ptr, + const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, @@ -283,11 +281,11 @@ unsigned int vp9_sub_pixel_mse16x16_c(const unsigned char *src_ptr, } #if CONFIG_SUPERBLOCKS -unsigned int vp9_sub_pixel_mse32x32_c(const unsigned char *src_ptr, +unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - const unsigned char *dst_ptr, + const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, @@ -297,16 +295,16 @@ unsigned int vp9_sub_pixel_mse32x32_c(const unsigned char *src_ptr, } #endif -unsigned int vp9_sub_pixel_variance16x8_c(const unsigned char *src_ptr, +unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - const unsigned char *dst_ptr, + const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - unsigned short FData3[16 * 9]; // Temp data bufffer used in filtering - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; + uint16_t FData3[16 * 9]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + const int16_t *HFilter, *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; @@ -317,16 +315,16 @@ unsigned int vp9_sub_pixel_variance16x8_c(const unsigned char *src_ptr, return vp9_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } -unsigned int vp9_sub_pixel_variance8x16_c(const unsigned char *src_ptr, +unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - const unsigned char *dst_ptr, + const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - unsigned short FData3[9 * 16]; // Temp data bufffer used in filtering - unsigned char temp2[20 * 16]; - const short *HFilter, *VFilter; + uint16_t FData3[9 * 16]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + const int16_t *HFilter, *VFilter; HFilter = vp9_bilinear_filters[xoffset]; VFilter = vp9_bilinear_filters[yoffset]; diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index ea1cc0970..5cf3adb83 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -30,7 +30,6 @@ VP9_COMMON_SRCS-yes += common/vp9_idctllm.c VP9_COMMON_SRCS-yes += common/vp9_alloccommon.h VP9_COMMON_SRCS-yes += common/vp9_blockd.h VP9_COMMON_SRCS-yes += common/vp9_common.h -VP9_COMMON_SRCS-yes += common/vp9_common_types.h VP9_COMMON_SRCS-yes += common/vp9_entropy.h VP9_COMMON_SRCS-yes += common/vp9_entropymode.h VP9_COMMON_SRCS-yes += common/vp9_entropymv.h diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index c35ebedb8..321fdb9e0 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -15,7 +15,7 @@ #include "vpx/vp8dx.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" -#include "common/vp9_onyxd.h" +#include "decoder/vp9_onyxd.h" #include "decoder/vp9_onyxd_int.h" #define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h index 6a8a1fc52..23be8f3dd 100644 --- a/vpx_scale/yv12config.h +++ b/vpx_scale/yv12config.h @@ -8,14 +8,15 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef YV12_CONFIG_H #define YV12_CONFIG_H + #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif +#include "vpx/vpx_integer.h" + #define VP8BORDERINPIXELS 32 #define VP9BORDERINPIXELS 64 #define VP9_INTERP_EXTEND 4 @@ -49,11 +50,11 @@ extern "C" int uv_stride; /* int uvinternal_width; */ - unsigned char *y_buffer; - unsigned char *u_buffer; - unsigned char *v_buffer; + uint8_t *y_buffer; + uint8_t *u_buffer; + uint8_t *v_buffer; - unsigned char *buffer_alloc; + uint8_t *buffer_alloc; int border; int frame_size; YUV_TYPE clrtype; @@ -62,12 +63,12 @@ extern "C" int flags; } YV12_BUFFER_CONFIG; - int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border); + int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, + int width, int height, int border); int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); #ifdef __cplusplus } #endif - -#endif /*YV12_CONFIG_H*/ +#endif // YV12_CONFIG_H From 05ec800ea48f5666e2360d67d2e71faff6c0b992 Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Fri, 16 Nov 2012 10:48:23 -0800 Subject: [PATCH 31/77] Use boolcoder API instead of inlining This patch changes the token packing to call the bool encoder API rather than inlining it into the token packing function, and similarly removes a special get_signed case from the detokenizer. This allows easier experimentation with changing the bool coder as a whole. Change-Id: I52c3625bbe4960b68cfb873b0e39ade0c82f9e91 --- vp9/decoder/vp9_detokenize.c | 21 +------ vp9/encoder/vp9_bitstream.c | 116 +---------------------------------- 2 files changed, 4 insertions(+), 133 deletions(-) diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 8fe218494..55ce0171a 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -60,26 +60,7 @@ static const vp9_prob cat6_prob[15] = { DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]); static int get_signed(BOOL_DECODER *br, int value_to_sign) { - const int split = (br->range + 1) >> 1; - const VP9_BD_VALUE bigsplit = (VP9_BD_VALUE)split << (VP9_BD_VALUE_SIZE - 8); - int v; - - if (br->count < 0) - vp9_bool_decoder_fill(br); - - if (br->value < bigsplit) { - br->range = split; - v = value_to_sign; - } else { - br->range = br->range - split; - br->value = br->value - bigsplit; - v = -value_to_sign; - } - br->range += br->range; - br->value += br->value; - --br->count; - - return v; + return decode_bool(br, 128) ? -value_to_sign : value_to_sign; } #define INCREMENT_COUNT(token) \ diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 498f64752..54d44faec 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -364,11 +364,6 @@ static void vp9_cond_prob_update(vp9_writer *bc, vp9_prob *oldp, vp9_prob upd, static void pack_mb_tokens(vp9_writer* const bc, TOKENEXTRA **tp, const TOKENEXTRA *const stop) { - unsigned int split; - unsigned int shift; - int count = bc->count; - unsigned int range = bc->range; - unsigned int lowvalue = bc->lowvalue; TOKENEXTRA *p = *tp; while (p < stop) { @@ -394,42 +389,8 @@ static void pack_mb_tokens(vp9_writer* const bc, do { const int bb = (v >> --n) & 1; - split = 1 + (((range - 1) * pp[i >> 1]) >> 8); + encode_bool(bc, bb, pp[i >> 1]); i = vp9_coef_tree[i + bb]; - - if (bb) { - lowvalue += split; - range = range - split; - } else { - range = split; - } - - shift = vp9_norm[range]; - range <<= shift; - count += shift; - - if (count >= 0) { - int offset = shift - count; - - if ((lowvalue << (offset - 1)) & 0x80000000) { - int x = bc->pos - 1; - - while (x >= 0 && bc->buffer[x] == 0xff) { - bc->buffer[x] = (unsigned char)0; - x--; - } - - bc->buffer[x] += 1; - } - - bc->buffer[bc->pos++] = (lowvalue >> (24 - offset)); - lowvalue <<= offset; - shift = count; - lowvalue &= 0xffffff; - count -= 8; - } - - lowvalue <<= shift; } while (n); @@ -444,87 +405,16 @@ static void pack_mb_tokens(vp9_writer* const bc, do { const int bb = (v >> --n) & 1; - split = 1 + (((range - 1) * pp[i >> 1]) >> 8); + encode_bool(bc, bb, pp[i >> 1]); i = b->tree[i + bb]; - - if (bb) { - lowvalue += split; - range = range - split; - } else { - range = split; - } - - shift = vp9_norm[range]; - range <<= shift; - count += shift; - - if (count >= 0) { - int offset = shift - count; - - if ((lowvalue << (offset - 1)) & 0x80000000) { - int x = bc->pos - 1; - - while (x >= 0 && bc->buffer[x] == 0xff) { - bc->buffer[x] = (unsigned char)0; - x--; - } - - bc->buffer[x] += 1; - } - - bc->buffer[bc->pos++] = (lowvalue >> (24 - offset)); - lowvalue <<= offset; - shift = count; - lowvalue &= 0xffffff; - count -= 8; - } - - lowvalue <<= shift; } while (n); } - - { - - split = (range + 1) >> 1; - - if (e & 1) { - lowvalue += split; - range = range - split; - } else { - range = split; - } - - range <<= 1; - - if ((lowvalue & 0x80000000)) { - int x = bc->pos - 1; - - while (x >= 0 && bc->buffer[x] == 0xff) { - bc->buffer[x] = (unsigned char)0; - x--; - } - - bc->buffer[x] += 1; - - } - - lowvalue <<= 1; - - if (!++count) { - count = -8; - bc->buffer[bc->pos++] = (lowvalue >> 24); - lowvalue &= 0xffffff; - } - } - + encode_bool(bc, e & 1, 128); } ++p; } - bc->count = count; - bc->lowvalue = lowvalue; - bc->range = range; *tp = p; } From de52948665bf83537dd29b8697bed0dfa5390daf Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Wed, 19 Dec 2012 13:44:32 -0800 Subject: [PATCH 32/77] make: fix dependency generation Remove an extra level of escaping around the $@ variable to get valid output. Prior to this change, modifying header files did not trigger a rebuild of sources dependent on them. Change-Id: I93ecc60371b705b64dc8a2583a5d31126fe3f851 --- build/make/configure.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/make/configure.sh b/build/make/configure.sh index d9b0fe7cd..72627377c 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -435,10 +435,10 @@ RTCD_OPTIONS = ${RTCD_OPTIONS} EOF if enabled rvct; then cat >> $1 << EOF -fmt_deps = sed -e 's;^__image.axf;\$\${@:.d=.o} \$\$@;' #hide +fmt_deps = sed -e 's;^__image.axf;\${@:.d=.o} \$@;' #hide EOF else cat >> $1 << EOF -fmt_deps = sed -e 's;^\([a-zA-Z0-9_]*\)\.o;\$\${@:.d=.o} \$\$@;' +fmt_deps = sed -e 's;^\([a-zA-Z0-9_]*\)\.o;\${@:.d=.o} \$@;' EOF fi From 08f0c7cc9c1e285c33711384834e7a3baf26bd7c Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Tue, 27 Nov 2012 15:51:06 -0800 Subject: [PATCH 33/77] New previous coef context experiment Adds an experiment to derive the previous context of a coefficient not just from the previous coefficient in the scan order but from a combination of several neighboring coefficients previously encountered in scan order. A precomputed table of neighbors for each location for each scan type and block size is used. Currently 5 neighbors are used. Results are about 0.2% positive using a strategy where the max coef magnitude from the 5 neigbors is used to derive the context. Change-Id: Ie708b54d8e1898af742846ce2d1e2b0d89fd4ad5 --- configure | 1 + vp9/common/vp9_alloccommon.c | 4 + vp9/common/vp9_entropy.c | 174 ++++++++++++++++++++++++++++++++--- vp9/common/vp9_entropy.h | 31 ++++++- vp9/decoder/vp9_detokenize.c | 34 +++++-- vp9/encoder/vp9_encodemb.c | 33 +++++++ vp9/encoder/vp9_rdopt.c | 54 +++++++++-- vp9/encoder/vp9_tokenize.c | 26 +++++- 8 files changed, 325 insertions(+), 32 deletions(-) diff --git a/configure b/configure index 55add837c..1126ea86e 100755 --- a/configure +++ b/configure @@ -250,6 +250,7 @@ EXPERIMENT_LIST=" tx32x32 dwt32x32hybrid cnvcontext + newcoefcontext " CONFIG_LIST=" external_build diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index 08882b3a6..bd86db8a8 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -220,4 +220,8 @@ void vp9_initialize_common() { vp9_entropy_mode_init(); vp9_entropy_mv_init(); + +#if CONFIG_NEWCOEFCONTEXT + vp9_init_neighbors(); +#endif } diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 559757e81..1402c084e 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -62,6 +62,7 @@ DECLARE_ALIGNED(16, const int, vp9_col_scan_4x4[16]) = { 2, 6, 10, 14, 3, 7, 11, 15 }; + DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]) = { 0, 1, 2, 3, 4, 5, 6, 7, @@ -69,16 +70,16 @@ DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]) = { 12, 13, 14, 15 }; - DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5, - 5, 3, 6, 3, 5, 4, 6, 6, - 6, 5, 5, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7 - }; + 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 +}; + DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]) = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, @@ -118,17 +119,17 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131, 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, - 42, 27, 12, 13, 28, 43, 58, 73, + 42, 27, 12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208, 224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14, - 15, 30, 45, 60, 75, 90, 105, 120, + 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225, 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46, 31, 47, - 62, 77, 92, 107, 122, 137, 152, 167, + 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242, 243, 228, 213, - 198, 183, 168, 153, 138, 123, 108, 93, + 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94, 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185, 170, 155, 140, 125, 110, @@ -530,6 +531,152 @@ vp9_extra_bit_struct vp9_extra_bits[12] = { #include "vp9/common/vp9_default_coef_probs.h" +#if CONFIG_NEWCOEFCONTEXT + +// Neighborhood 5-tuples for various scans and blocksizes, +// in {top, left, topleft, topright, bottomleft} order +// for each position in raster scan order. +// -1 indicates the neighbor does not exist. +DECLARE_ALIGNED(16, int, + vp9_default_zig_zag1d_4x4_neighbors[16 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_col_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_row_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_default_zig_zag1d_8x8_neighbors[64 * MAX_NEIGHBORS]); +DECLARE_ALIGNED(16, int, + vp9_default_zig_zag1d_16x16_neighbors[256 * MAX_NEIGHBORS]); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +DECLARE_ALIGNED(16, int, + vp9_default_zig_zag1d_32x32_neighbors[1024 * MAX_NEIGHBORS]); +#endif + +static int find_in_scan(const int *scan, int l, int m) { + int i, l2 = l * l; + for (i = 0; i < l2; ++i) { + if (scan[i] == m) + return i; + } + return -1; +} + +static void init_scan_neighbors(const int *scan, int l, int *neighbors) { + int l2 = l * l; + int m, n, i, j, k; + for (n = 0; n < l2; ++n) { + int locn = find_in_scan(scan, l, n); + int z = -1; + i = n / l; + j = n % l; + for (k = 0; k < MAX_NEIGHBORS; ++k) + neighbors[MAX_NEIGHBORS * n + k] = -1; + if (i - 1 >= 0) { + m = (i - 1) * l + j; + if (find_in_scan(scan, l, m) < locn) { + neighbors[MAX_NEIGHBORS * n] = m; + if (m == 0) z = 0; + } + } + if (j - 1 >= 0) { + m = i * l + j - 1; + if (find_in_scan(scan, l, m) < locn) { + neighbors[MAX_NEIGHBORS * n + 1] = m; + if (m == 0) z = 1; + } + } + if (i - 1 >= 0 && j - 1 >= 0) { + m = (i - 1) * l + j - 1; + if (find_in_scan(scan, l, m) < locn) { + neighbors[MAX_NEIGHBORS * n + 2] = m; + if (m == 0) z = 2; + } + } + if (i - 1 >= 0 && j + 1 < l) { + m = (i - 1) * l + j + 1; + if (find_in_scan(scan, l, m) < locn) { + neighbors[MAX_NEIGHBORS * n + 3] = m; + if (m == 0) z = 3; + } + } + if (i + 1 < l && j - 1 >= 0) { + m = (i + 1) * l + j - 1; + if (find_in_scan(scan, l, m) < locn) { + neighbors[MAX_NEIGHBORS * n + 4] = m; + if (m == 0) z = 4; + } + } + if (z != -1) { // zero exists + int v = 0; + for (k = 0; k < MAX_NEIGHBORS; ++k) + v += (neighbors[MAX_NEIGHBORS * n + k] > 0); + if (v) { + neighbors[MAX_NEIGHBORS * n + z] = -1; + } + } + } +} + +void vp9_init_neighbors() { + init_scan_neighbors(vp9_default_zig_zag1d_4x4, 4, + vp9_default_zig_zag1d_4x4_neighbors); + init_scan_neighbors(vp9_row_scan_4x4, 4, + vp9_row_scan_4x4_neighbors); + init_scan_neighbors(vp9_col_scan_4x4, 4, + vp9_col_scan_4x4_neighbors); + init_scan_neighbors(vp9_default_zig_zag1d_8x8, 8, + vp9_default_zig_zag1d_8x8_neighbors); + init_scan_neighbors(vp9_default_zig_zag1d_16x16, 16, + vp9_default_zig_zag1d_16x16_neighbors); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + init_scan_neighbors(vp9_default_zig_zag1d_32x32, 32, + vp9_default_zig_zag1d_32x32_neighbors); +#endif +} + +const int *vp9_get_coef_neighbors_handle(const int *scan) { + if (scan == vp9_default_zig_zag1d_4x4) { + return vp9_default_zig_zag1d_4x4_neighbors; + } else if (scan == vp9_row_scan_4x4) { + return vp9_row_scan_4x4_neighbors; + } else if (scan == vp9_col_scan_4x4) { + return vp9_col_scan_4x4_neighbors; + } else if (scan == vp9_default_zig_zag1d_8x8) { + return vp9_default_zig_zag1d_8x8_neighbors; + } else if (scan == vp9_default_zig_zag1d_16x16) { + return vp9_default_zig_zag1d_16x16_neighbors; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + } else if (scan == vp9_default_zig_zag1d_32x32) { + return vp9_default_zig_zag1d_32x32_neighbors; +#endif + } + return vp9_default_zig_zag1d_4x4_neighbors; +} + +int vp9_get_coef_neighbor_context(const short int *qcoeff_ptr, int nodc, + const int *neigbor_handle, int rc) { + static int neighbors_used = MAX_NEIGHBORS; // maximum is MAX_NEIGHBORS + const int *nb = neigbor_handle + rc * MAX_NEIGHBORS; + int i, v, val = 0, n = 0; + for (i = 0; i < neighbors_used; ++i) { + if (nb[i] == -1 || (nb[i] == 0 && nodc)) { + continue; + } + v = abs(qcoeff_ptr[nb[i]]); + val = (v > val ? v : val); + n++; + } + if (n == 0) + return 0; + else if (val <= 1) + return val; + else if (val < 4) + return 2; + else + return 3; +} +#endif /* CONFIG_NEWCOEFCONTEXT */ + void vp9_default_coef_probs(VP9_COMMON *pc) { vpx_memcpy(pc->fc.coef_probs_4x4, default_coef_probs_4x4, sizeof(pc->fc.coef_probs_4x4)); @@ -546,7 +693,6 @@ void vp9_default_coef_probs(VP9_COMMON *pc) { vpx_memcpy(pc->fc.hybrid_coef_probs_16x16, default_hybrid_coef_probs_16x16, sizeof(pc->fc.hybrid_coef_probs_16x16)); - #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS vpx_memcpy(pc->fc.coef_probs_32x32, default_coef_probs_32x32, sizeof(pc->fc.coef_probs_32x32)); diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index eb8cfe93c..99826015a 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -98,7 +98,7 @@ extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]); distinct bands). */ /*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */ -#define PREV_COEF_CONTEXTS 4 +#define PREV_COEF_CONTEXTS 4 typedef unsigned int vp9_coeff_count[COEF_BANDS][PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; @@ -127,13 +127,36 @@ extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]); #endif void vp9_coef_tree_initialize(void); - void vp9_adapt_coef_probs(struct VP9Common *); static void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { /* Clear entropy contexts */ - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } +#if CONFIG_NEWCOEFCONTEXT + +#define MAX_NEIGHBORS 5 +#define NEWCOEFCONTEXT_BAND_COND(b) ((b) >= 1) +void vp9_init_neighbors(void); + +const int *vp9_get_coef_neighbors_handle(const int *scan); +int vp9_get_coef_neighbor_context(const short int *qcoeff_ptr, int nodc, + const int *neigbor_handle, int rc); +extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_4x4_neighbors[ + 16 * MAX_NEIGHBORS]); +extern DECLARE_ALIGNED(16, int, vp9_row_scan_4x4_neighbors[ + 16 * MAX_NEIGHBORS]); +extern DECLARE_ALIGNED(16, int, vp9_col_scan_4x4_neighbors[ + 16 * MAX_NEIGHBORS]); +extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_8x8_neighbors[ + 64 * MAX_NEIGHBORS]); +extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_16x16_neighbors[ + 256 * MAX_NEIGHBORS]); +#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_32x32_neighbors[ + 1024 * MAX_NEIGHBORS]); +#endif +#endif // CONFIG_NEWCOEFCONTEXT #endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 55ce0171a..bdf578b3e 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -63,11 +63,24 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) { return decode_bool(br, 128) ? -value_to_sign : value_to_sign; } -#define INCREMENT_COUNT(token) \ - do { \ +#if CONFIG_NEWCOEFCONTEXT +#define PT pn +#define INCREMENT_COUNT(token) \ + do { \ + coef_counts[type][coef_bands[c]][pn][token]++; \ + pn = pt = vp9_prev_token_class[token]; \ + if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(coef_bands[c + 1])) \ + pn = vp9_get_coef_neighbor_context( \ + qcoeff_ptr, nodc, neighbors, scan[c + 1]); \ + } while (0) +#else +#define PT pt +#define INCREMENT_COUNT(token) \ + do { \ coef_counts[type][coef_bands[c]][pt][token]++; \ pt = vp9_prev_token_class[token]; \ } while (0) +#endif /* CONFIG_NEWCOEFCONTEXT */ #define WRITE_COEF_CONTINUE(val, token) \ { \ @@ -92,7 +105,12 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, const int *const scan, TX_SIZE txfm_size, const int *coef_bands) { FRAME_CONTEXT *const fc = &dx->common.fc; - int pt, c = (type == PLANE_TYPE_Y_NO_DC); +#if CONFIG_NEWCOEFCONTEXT + const int *neighbors; + int pn; +#endif + int nodc = (type == PLANE_TYPE_Y_NO_DC); + int pt, c = nodc; vp9_coeff_probs *coef_probs; vp9_prob *prob; vp9_coeff_count *coef_counts; @@ -135,11 +153,15 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, } VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); +#if CONFIG_NEWCOEFCONTEXT + pn = pt; + neighbors = vp9_get_coef_neighbors_handle(scan); +#endif while (1) { int val; const uint8_t *cat6 = cat6_prob; if (c >= seg_eob) break; - prob = coef_probs[type][coef_bands[c]][pt]; + prob = coef_probs[type][coef_bands[c]][PT]; if (!vp9_read(br, prob[EOB_CONTEXT_NODE])) break; SKIP_START: @@ -147,7 +169,7 @@ SKIP_START: if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); ++c; - prob = coef_probs[type][coef_bands[c]][pt]; + prob = coef_probs[type][coef_bands[c]][PT]; goto SKIP_START; } // ONE_CONTEXT_NODE_0_ @@ -211,7 +233,7 @@ SKIP_START: } if (c < seg_eob) - coef_counts[type][coef_bands[c]][pt][DCT_EOB_TOKEN]++; + coef_counts[type][coef_bands[c]][PT][DCT_EOB_TOKEN]++; a[0] = l[0] = (c > !type); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 66ec31c83..91eea4e51 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -380,6 +380,9 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, int err_mult = plane_rd_mult[type]; int default_eob; int const *scan, *bands; +#if CONFIG_NEWCOEFCONTEXT + const int *neighbors; +#endif switch (tx_size) { default: @@ -421,6 +424,9 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, default_eob = 256; break; } +#if CONFIG_NEWCOEFCONTEXT + neighbors = vp9_get_coef_neighbors_handle(scan); +#endif /* Now set up a Viterbi trellis to evaluate alternative roundings. */ rdmult = mb->rdmult * err_mult; @@ -454,6 +460,11 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, if (next < default_eob) { band = bands[i + 1]; pt = vp9_prev_token_class[t0]; +#if CONFIG_NEWCOEFCONTEXT + if (NEWCOEFCONTEXT_BAND_COND(band)) + pt = vp9_get_coef_neighbor_context( + qcoeff_ptr, i0, neighbors, scan[i + 1]); +#endif rate0 += mb->token_costs[tx_size][type][band][pt][tokens[next][0].token]; rate1 += @@ -501,12 +512,34 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, if (next < default_eob) { band = bands[i + 1]; if (t0 != DCT_EOB_TOKEN) { +#if CONFIG_NEWCOEFCONTEXT + int tmp = qcoeff_ptr[scan[i]]; + qcoeff_ptr[scan[i]] = x; + if (NEWCOEFCONTEXT_BAND_COND(band)) + pt = vp9_get_coef_neighbor_context( + qcoeff_ptr, i0, neighbors, scan[i + 1]); + else + pt = vp9_prev_token_class[t0]; + qcoeff_ptr[scan[i]] = tmp; +#else pt = vp9_prev_token_class[t0]; +#endif rate0 += mb->token_costs[tx_size][type][band][pt][ tokens[next][0].token]; } if (t1 != DCT_EOB_TOKEN) { +#if CONFIG_NEWCOEFCONTEXT + int tmp = qcoeff_ptr[scan[i]]; + qcoeff_ptr[scan[i]] = x; + if (NEWCOEFCONTEXT_BAND_COND(band)) + pt = vp9_get_coef_neighbor_context( + qcoeff_ptr, i0, neighbors, scan[i + 1]); + else + pt = vp9_prev_token_class[t1]; + qcoeff_ptr[scan[i]] = tmp; +#else pt = vp9_prev_token_class[t1]; +#endif rate1 += mb->token_costs[tx_size][type][band][pt][ tokens[next][1].token]; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 3f9baef75..69bc892bb 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -513,29 +513,48 @@ int vp9_uvsse(MACROBLOCK *x) { } +#if CONFIG_NEWCOEFCONTEXT +#define PT pn +#else +#define PT pt +#endif + static int cost_coeffs_2x2(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { - int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */ + int nodc = (type == PLANE_TYPE_Y_NO_DC); + int c = nodc; /* start at coef 0, unless Y with Y2 */ int eob = b->eob; int pt; /* surrounding block/prev coef predictor */ int cost = 0; int16_t *qcoeff_ptr = b->qcoeff; +#if CONFIG_NEWCOEFCONTEXT + const int *neighbors = vp9_default_zig_zag1d_4x4_neighbors; + int pn; +#endif VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); assert(eob <= 4); +#if CONFIG_NEWCOEFCONTEXT + pn = pt; +#endif for (; c < eob; c++) { int v = qcoeff_ptr[vp9_default_zig_zag1d_4x4[c]]; int t = vp9_dct_value_tokens_ptr[v].Token; cost += mb->token_costs[TX_8X8][type][vp9_coef_bands_4x4[c]][pt][t]; cost += vp9_dct_value_cost_ptr[v]; pt = vp9_prev_token_class[t]; +#if CONFIG_NEWCOEFCONTEXT + if (c < 4 - 1) + pn = vp9_get_coef_neighbor_context( + qcoeff_ptr, nodc, neighbors, vp9_default_zig_zag1d_4x4[c + 1]); +#endif } if (c < 4) cost += mb->token_costs[TX_8X8][type][vp9_coef_bands_4x4[c]] - [pt] [DCT_EOB_TOKEN]; + [PT][DCT_EOB_TOKEN]; // is eob first coefficient; pt = (c > !type); *a = *l = pt; @@ -546,7 +565,8 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, TX_SIZE tx_size) { const int eob = b->eob; - int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */ + int nodc = (type == PLANE_TYPE_Y_NO_DC); + int c = nodc; /* start at coef 0, unless Y with Y2 */ int cost = 0, default_eob, seg_eob; int pt; /* surrounding block/prev coef predictor */ int const *scan, *band; @@ -555,6 +575,10 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, MB_MODE_INFO *mbmi = &mb->e_mbd.mode_info_context->mbmi; TX_TYPE tx_type = DCT_DCT; int segment_id = mbmi->segment_id; +#if CONFIG_NEWCOEFCONTEXT + const int *neighbors; + int pn; +#endif scan = vp9_default_zig_zag1d_4x4; band = vp9_coef_bands_4x4; default_eob = 16; @@ -628,17 +652,28 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); +#if CONFIG_NEWCOEFCONTEXT + neighbors = vp9_get_coef_neighbors_handle(scan); + pn = pt; +#endif if (tx_type != DCT_DCT) { for (; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; int t = vp9_dct_value_tokens_ptr[v].Token; - cost += mb->hybrid_token_costs[tx_size][type][band[c]][pt][t]; + cost += mb->hybrid_token_costs[tx_size][type][band[c]][PT][t]; cost += vp9_dct_value_cost_ptr[v]; pt = vp9_prev_token_class[t]; +#if CONFIG_NEWCOEFCONTEXT + if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1])) + pn = vp9_get_coef_neighbor_context( + qcoeff_ptr, nodc, neighbors, scan[c + 1]); + else + pn = pt; +#endif } if (c < seg_eob) cost += mb->hybrid_token_costs[tx_size][type][band[c]] - [pt][DCT_EOB_TOKEN]; + [PT][DCT_EOB_TOKEN]; } else { for (; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; @@ -646,10 +681,17 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, cost += mb->token_costs[tx_size][type][band[c]][pt][t]; cost += vp9_dct_value_cost_ptr[v]; pt = vp9_prev_token_class[t]; +#if CONFIG_NEWCOEFCONTEXT + if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1])) + pn = vp9_get_coef_neighbor_context( + qcoeff_ptr, nodc, neighbors, scan[c + 1]); + else + pn = pt; +#endif } if (c < seg_eob) cost += mb->token_costs[tx_size][type][band[c]] - [pt][DCT_EOB_TOKEN]; + [PT][DCT_EOB_TOKEN]; } // is eob first coefficient; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index b0b5e6d5f..58bb251f5 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -103,6 +103,13 @@ static void fill_value_tokens() { vp9_dct_value_tokens_ptr = dct_value_tokens + DCT_MAX_VALUE; vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; } + +#if CONFIG_NEWCOEFCONTEXT +#define PT pn +#else +#define PT pt +#endif + static void tokenize_b(VP9_COMP *cpi, MACROBLOCKD *xd, const int ib, @@ -123,6 +130,10 @@ static void tokenize_b(VP9_COMP *cpi, vp9_coeff_probs *probs; const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type(xd, b) : DCT_DCT; +#if CONFIG_NEWCOEFCONTEXT + const int *neighbors; + int pn; +#endif ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib]; @@ -229,6 +240,10 @@ static void tokenize_b(VP9_COMP *cpi, } VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); +#if CONFIG_NEWCOEFCONTEXT + neighbors = vp9_get_coef_neighbors_handle(scan); + pn = pt; +#endif if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) seg_eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); @@ -249,14 +264,21 @@ static void tokenize_b(VP9_COMP *cpi, } t->Token = token; - t->context_tree = probs[type][band][pt]; + t->context_tree = probs[type][band][PT]; t->skip_eob_node = (pt == 0) && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) || (band > 1 && type == PLANE_TYPE_Y_NO_DC)); assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0); if (!dry_run) { - ++counts[type][band][pt][token]; + ++counts[type][band][PT][token]; } pt = vp9_prev_token_class[token]; +#if CONFIG_NEWCOEFCONTEXT + if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(bands[c + 1])) + pn = vp9_get_coef_neighbor_context( + qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]); + else + pn = pt; +#endif ++t; } while (c < eob && ++c < seg_eob); From 9a7023d2ad82ba6905ed51adfc9821bea2fba26a Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Thu, 20 Dec 2012 15:39:43 -0800 Subject: [PATCH 34/77] Fix MSVS build for removed vp9/common/vp9_onyxd.h Change-Id: I75ad0b4ca5b53b5bf759cc26a484ec196d275279 --- vp9/vp9_common.mk | 1 - vp9/vp9dx.mk | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 5cf3adb83..4d17233e7 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -12,7 +12,6 @@ VP9_COMMON_SRCS-yes += vp9_common.mk VP9_COMMON_SRCS-yes += common/vp9_pragmas.h VP9_COMMON_SRCS-yes += common/vp9_ppflags.h VP9_COMMON_SRCS-yes += common/vp9_onyx.h -VP9_COMMON_SRCS-yes += common/vp9_onyxd.h VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c VP9_COMMON_SRCS-yes += common/vp9_asm_com_offsets.c VP9_COMMON_SRCS-yes += common/vp9_blockd.c diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk index 004039016..e03e63cd4 100644 --- a/vp9/vp9dx.mk +++ b/vp9/vp9dx.mk @@ -27,6 +27,7 @@ VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.h VP9_DX_SRCS-yes += decoder/vp9_decodemv.h VP9_DX_SRCS-yes += decoder/vp9_dequantize.h VP9_DX_SRCS-yes += decoder/vp9_detokenize.h +VP9_DX_SRCS-yes += decoder/vp9_onyxd.h VP9_DX_SRCS-yes += decoder/vp9_onyxd_int.h VP9_DX_SRCS-yes += decoder/vp9_treereader.h VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c From 89ac94f8fb7be1ce9baee198954a890941ecf936 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Fri, 21 Dec 2012 14:41:49 -0800 Subject: [PATCH 35/77] Removed mmx versions of vp9_bilinear_predict filters These filters will not work with VP9. Change-Id: Ic26c77961084fcea6bfa97f4cd95afdea2282e85 --- vp9/common/vp9_rtcd_defs.sh | 8 +- vp9/common/x86/vp9_asm_stubs.c | 24 -- vp9/common/x86/vp9_subpixel_mmx.asm | 459 ---------------------------- vp9/common/x86/vp9_subpixel_x86.h | 13 - 4 files changed, 4 insertions(+), 500 deletions(-) diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 44781cb8a..9cf7121ba 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -319,10 +319,10 @@ prototype void vp9_sixtap_predict_avg "uint8_t *src_ptr, int src_pixels_per_lin specialize vp9_sixtap_predict_avg prototype void vp9_bilinear_predict16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" -specialize vp9_bilinear_predict16x16 mmx sse2 +specialize vp9_bilinear_predict16x16 sse2 prototype void vp9_bilinear_predict8x8 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" -specialize vp9_bilinear_predict8x8 mmx sse2 +specialize vp9_bilinear_predict8x8 sse2 prototype void vp9_bilinear_predict_avg16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" specialize vp9_bilinear_predict_avg16x16 @@ -331,10 +331,10 @@ prototype void vp9_bilinear_predict_avg8x8 "uint8_t *src_ptr, int src_pixels_pe specialize vp9_bilinear_predict_avg8x8 prototype void vp9_bilinear_predict8x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" -specialize vp9_bilinear_predict8x4 mmx +specialize vp9_bilinear_predict8x4 prototype void vp9_bilinear_predict4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" -specialize vp9_bilinear_predict4x4 mmx +specialize vp9_bilinear_predict4x4 prototype void vp9_bilinear_predict_avg4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" specialize vp9_bilinear_predict_avg4x4 diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c index e53ede066..0d268a264 100644 --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@ -15,8 +15,6 @@ extern const short vp9_six_tap_mmx[16][6 * 8]; -extern const short vp9_bilinear_filters_8x_mmx[16][2 * 8]; - extern void vp9_filter_block1d_h6_mmx(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, @@ -95,8 +93,6 @@ extern void vp9_filter_block1d8_v6_only_sse2(unsigned char *src_ptr, unsigned int output_height, const short *vp9_filter); -extern prototype_subpixel_predict(vp9_bilinear_predict8x8_mmx); - /////////////////////////////////////////////////////////////////////////// // the mmx function that does the bilinear filtering and var calculation // // int one pass // @@ -232,26 +228,6 @@ void vp9_sixtap_predict8x4_mmx(unsigned char *src_ptr, vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch, 16, 8, 4, 8, vfilter); } - -void vp9_bilinear_predict16x16_mmx(unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - vp9_bilinear_predict8x8_mmx(src_ptr, - src_pixels_per_line, xoffset, yoffset, - dst_ptr, dst_pitch); - vp9_bilinear_predict8x8_mmx(src_ptr + 8, - src_pixels_per_line, xoffset, yoffset, - dst_ptr + 8, dst_pitch); - vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line, - src_pixels_per_line, xoffset, yoffset, - dst_ptr + dst_pitch * 8, dst_pitch); - vp9_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8, - src_pixels_per_line, xoffset, yoffset, - dst_ptr + dst_pitch * 8 + 8, dst_pitch); -} #endif #if HAVE_SSE2 diff --git a/vp9/common/x86/vp9_subpixel_mmx.asm b/vp9/common/x86/vp9_subpixel_mmx.asm index 2f757fa80..58d92bf05 100644 --- a/vp9/common/x86/vp9_subpixel_mmx.asm +++ b/vp9/common/x86/vp9_subpixel_mmx.asm @@ -202,438 +202,6 @@ sym(vp9_filter_block1dc_v6_mmx): pop rbp ret - -;void bilinear_predict8x8_mmx -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp9_bilinear_predict8x8_mmx) -sym(vp9_bilinear_predict8x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = bilinear_filters_mmx[xoffset]; - ;const short *VFilter = bilinear_filters_mmx[yoffset]; - - movsxd rax, dword ptr arg(2) ;xoffset - mov rdi, arg(4) ;dst_ptr ; - - shl rax, 5 ; offset * 32 - lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))] - - add rax, rcx ; HFilter - mov rsi, arg(0) ;src_ptr ; - - movsxd rdx, dword ptr arg(5) ;dst_pitch - movq mm1, [rax] ; - - movq mm2, [rax+16] ; - movsxd rax, dword ptr arg(3) ;yoffset - - pxor mm0, mm0 ; - - shl rax, 5 ; offset*32 - add rax, rcx ; VFilter - - lea rcx, [rdi+rdx*8] ; - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; - - - - ; get the first horizontal line done ; - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP9_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - add rsi, rdx ; next line -.next_row_8x8: - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - movq mm5, mm7 ; - movq mm6, mm7 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 - - pmullw mm5, [rax] ; - pmullw mm6, [rax] ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP9_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - - pmullw mm3, [rax+16] ; - pmullw mm4, [rax+16] ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP9_FILTER_SHIFT ; - - packuswb mm3, mm4 - - movq [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, rdx ; next line - add rdi, dword ptr arg(5) ;dst_pitch ; -%else - movsxd r8, dword ptr arg(5) ;dst_pitch - add rsi, rdx ; next line - add rdi, r8 ;dst_pitch -%endif - cmp rdi, rcx ; - jne .next_row_8x8 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void bilinear_predict8x4_mmx -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp9_bilinear_predict8x4_mmx) -sym(vp9_bilinear_predict8x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = bilinear_filters_mmx[xoffset]; - ;const short *VFilter = bilinear_filters_mmx[yoffset]; - - movsxd rax, dword ptr arg(2) ;xoffset - mov rdi, arg(4) ;dst_ptr ; - - lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))] - shl rax, 5 - - mov rsi, arg(0) ;src_ptr ; - add rax, rcx - - movsxd rdx, dword ptr arg(5) ;dst_pitch - movq mm1, [rax] ; - - movq mm2, [rax+16] ; - movsxd rax, dword ptr arg(3) ;yoffset - - pxor mm0, mm0 ; - shl rax, 5 - - add rax, rcx - lea rcx, [rdi+rdx*4] ; - - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; - - ; get the first horizontal line done ; - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP9_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - add rsi, rdx ; next line -.next_row_8x4: - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - movq mm5, mm7 ; - movq mm6, mm7 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 - - pmullw mm5, [rax] ; - pmullw mm6, [rax] ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP9_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - - pmullw mm3, [rax+16] ; - pmullw mm4, [rax+16] ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP9_FILTER_SHIFT ; - - packuswb mm3, mm4 - - movq [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, rdx ; next line - add rdi, dword ptr arg(5) ;dst_pitch ; -%else - movsxd r8, dword ptr arg(5) ;dst_pitch - add rsi, rdx ; next line - add rdi, r8 -%endif - cmp rdi, rcx ; - jne .next_row_8x4 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void bilinear_predict4x4_mmx -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp9_bilinear_predict4x4_mmx) -sym(vp9_bilinear_predict4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = bilinear_filters_mmx[xoffset]; - ;const short *VFilter = bilinear_filters_mmx[yoffset]; - - movsxd rax, dword ptr arg(2) ;xoffset - mov rdi, arg(4) ;dst_ptr ; - - lea rcx, [GLOBAL(sym(vp9_bilinear_filters_8x_mmx))] - shl rax, 5 - - add rax, rcx ; HFilter - mov rsi, arg(0) ;src_ptr ; - - movsxd rdx, dword ptr arg(5) ;ldst_pitch - movq mm1, [rax] ; - - movq mm2, [rax+16] ; - movsxd rax, dword ptr arg(3) ;yoffset - - pxor mm0, mm0 ; - shl rax, 5 - - add rax, rcx - lea rcx, [rdi+rdx*4] ; - - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; - - ; get the first horizontal line done ; - movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - - pmullw mm3, mm1 ; - movd mm5, [rsi+1] ; - - punpcklbw mm5, mm0 ; - pmullw mm5, mm2 ; - - paddw mm3, mm5 ; - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 - - movq mm7, mm3 ; - packuswb mm7, mm0 ; - - add rsi, rdx ; next line -.next_row_4x4: - movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - - pmullw mm3, mm1 ; - movd mm5, [rsi+1] ; - - punpcklbw mm5, mm0 ; - pmullw mm5, mm2 ; - - paddw mm3, mm5 ; - - movq mm5, mm7 ; - punpcklbw mm5, mm0 ; - - pmullw mm5, [rax] ; - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 - movq mm7, mm3 ; - - packuswb mm7, mm0 ; - - pmullw mm3, [rax+16] ; - paddw mm3, mm5 ; - - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP9_FILTER_SHIFT ; xmm3 /= 128 - - packuswb mm3, mm0 - movd [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, rdx ; next line - add rdi, dword ptr arg(5) ;dst_pitch ; -%else - movsxd r8, dword ptr arg(5) ;dst_pitch ; - add rsi, rdx ; next line - add rdi, r8 -%endif - - cmp rdi, rcx ; - jne .next_row_4x4 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - - SECTION_RODATA align 16 rd: @@ -698,30 +266,3 @@ sym(vp9_six_tap_mmx): times 8 dw -6 times 8 dw 0 - -align 16 -global HIDDEN_DATA(sym(vp9_bilinear_filters_8x_mmx)) -sym(vp9_bilinear_filters_8x_mmx): - times 8 dw 128 - times 8 dw 0 - - times 8 dw 112 - times 8 dw 16 - - times 8 dw 96 - times 8 dw 32 - - times 8 dw 80 - times 8 dw 48 - - times 8 dw 64 - times 8 dw 64 - - times 8 dw 48 - times 8 dw 80 - - times 8 dw 32 - times 8 dw 96 - - times 8 dw 16 - times 8 dw 112 diff --git a/vp9/common/x86/vp9_subpixel_x86.h b/vp9/common/x86/vp9_subpixel_x86.h index 86b72f39a..25bc26d9b 100644 --- a/vp9/common/x86/vp9_subpixel_x86.h +++ b/vp9/common/x86/vp9_subpixel_x86.h @@ -25,10 +25,6 @@ extern prototype_subpixel_predict(vp9_sixtap_predict8x8_mmx); extern prototype_subpixel_predict(vp9_sixtap_predict8x4_mmx); extern prototype_subpixel_predict(vp9_sixtap_predict4x4_mmx); extern prototype_subpixel_predict(vp9_bilinear_predict16x16_mmx); -extern prototype_subpixel_predict(vp9_bilinear_predict8x8_mmx); -extern prototype_subpixel_predict(vp9_bilinear_predict8x4_mmx); -extern prototype_subpixel_predict(vp9_bilinear_predict4x4_mmx); - #if !CONFIG_RUNTIME_CPU_DETECT #undef vp9_subpix_sixtap16x16 @@ -46,15 +42,6 @@ extern prototype_subpixel_predict(vp9_bilinear_predict4x4_mmx); #undef vp9_subpix_bilinear16x16 #define vp9_subpix_bilinear16x16 vp9_bilinear_predict16x16_mmx -#undef vp9_subpix_bilinear8x8 -#define vp9_subpix_bilinear8x8 vp9_bilinear_predict8x8_mmx - -#undef vp9_subpix_bilinear8x4 -#define vp9_subpix_bilinear8x4 vp9_bilinear_predict8x4_mmx - -#undef vp9_subpix_bilinear4x4 -#define vp9_subpix_bilinear4x4 vp9_bilinear_predict4x4_mmx - #endif #endif From cc80247f16ce83271e5c2043307dc65c8bb4bbf7 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Thu, 27 Dec 2012 13:48:17 -0800 Subject: [PATCH 36/77] Switch the order of calculating 2-D inverse transform The 2-D inverse transform X = M1*Z*Transposed_M2 was calculated in 2 steps from left to right: 1. Vertical transform: Y = M1*Z 2. Horizontal transform: X= Y*Transposed_M2 In SIMD, a transpose is needed in vertical transform. Here, switched the calculation order to do it from right to left. In this way, we could eliminate that transpose by writing the intermediate results out to their transposed positions. Change-Id: I34dfe5eb01292f6e363712420d99475e2e81e12c --- vp9/common/vp9_idctllm.c | 44 +++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 7ce8cbee1..6cbc25967 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -399,10 +399,10 @@ void vp9_ihtllm_float_c(const int16_t *input, int16_t *output, int pitch, } /* Converted the transforms to integer form. */ -#define VERTICAL_SHIFT 14 // 16 -#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1) -#define HORIZONTAL_SHIFT 17 // 15 +#define HORIZONTAL_SHIFT 14 // 16 #define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1) +#define VERTICAL_SHIFT 17 // 15 +#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1) void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch, TX_TYPE tx_type, int tx_dim) { int i, j, k; @@ -444,41 +444,47 @@ void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch, break; } - /* vertical transformation */ + /* 2-D inverse transform X = M1*Z*Transposed_M2 is calculated in 2 steps + * from right to left: + * 1. horizontal transform: Y= Z*Transposed_M2 + * 2. vertical transform: X = M1*Y + * In SIMD, doing this way could eliminate the transpose needed if it is + * calculated from left to right. + */ + /* Horizontal transformation */ for (j = 0; j < tx_dim; j++) { for (i = 0; i < tx_dim; i++) { int temp = 0; for (k = 0; k < tx_dim; k++) { - temp += ptv[k] * ip[(k * tx_dim)]; + temp += ip[k] * pth[k]; } - im[i] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT); - ip++; + /* Calculate im and store it in its transposed position. */ + im[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT); + ip += tx_dim; } - im += tx_dim; // 16 - ptv += tx_dim; + im += tx_dim; + pth += tx_dim; ip = input; } - /* horizontal transformation */ + /* Vertical transformation */ im = &imbuf[0]; - for (j = 0; j < tx_dim; j++) { - const int16_t *pthc = pth; - - for (i = 0; i < tx_dim; i++) { + for (i = 0; i < tx_dim; i++) { + for (j = 0; j < tx_dim; j++) { int temp = 0; for (k = 0; k < tx_dim; k++) { - temp += im[k] * pthc[k]; + temp += ptv[k] * im[k]; } - op[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT); - pthc += tx_dim; + op[j] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT); + im += tx_dim; } - - im += tx_dim; // 16 + im = &imbuf[0]; + ptv += tx_dim; op += shortpitch; } } From 0f4de1573a1e762e18c11626674532d5baf4ceb1 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Thu, 27 Dec 2012 16:04:44 -0800 Subject: [PATCH 37/77] Skip finding best ref_mvs when the mode is ZEROMV Read mode before calling vp9_find_best_ref_mvs(). If the mode is ZEROMV, the best ref_mvs are not needed. Then, we can skip calling vp9_find_best_ref_mvs(). Change-Id: I5baa3658dd3f1c7107211cbbbcf919b4584be2e2 --- vp9/decoder/vp9_decodemv.c | 63 ++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index f36a22409..30e5ef17a 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -797,16 +797,34 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, ref_frame, mbmi->ref_mvs[ref_frame], cm->ref_frame_sign_bias); - vp9_find_best_ref_mvs(xd, - xd->pre.y_buffer, - recon_y_stride, - mbmi->ref_mvs[ref_frame], - &nearest, &nearby); - vp9_mv_ref_probs(&pbi->common, mv_ref_p, mbmi->mb_mode_context[ref_frame]); - best_mv = mbmi->ref_mvs[ref_frame][0]; + // Is the segment level mode feature enabled for this segment + if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE)) { + mbmi->mode = + vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); + } else { +#if CONFIG_SUPERBLOCKS + if (mbmi->encoded_as_sb) + mbmi->mode = read_sb_mv_ref(bc, mv_ref_p); + else +#endif + mbmi->mode = read_mv_ref(bc, mv_ref_p); + + vp9_accum_mv_refs(&pbi->common, mbmi->mode, + mbmi->mb_mode_context[ref_frame]); + } + + if (mbmi->mode != ZEROMV) { + vp9_find_best_ref_mvs(xd, + xd->pre.y_buffer, + recon_y_stride, + mbmi->ref_mvs[ref_frame], + &nearest, &nearby); + + best_mv.as_int = (mbmi->ref_mvs[ref_frame][0]).as_int; + } #ifdef DEC_DEBUG if (dec_debug) @@ -816,21 +834,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, #endif } - // Is the segment level mode feature enabled for this segment - if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE)) { - mbmi->mode = - vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); - } else { -#if CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb) { - mbmi->mode = read_sb_mv_ref(bc, mv_ref_p); - } else -#endif - mbmi->mode = read_mv_ref(bc, mv_ref_p); - - vp9_accum_mv_refs(&pbi->common, mbmi->mode, - mbmi->mb_mode_context[ref_frame]); - } #if CONFIG_PRED_FILTER if (mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV) { @@ -889,13 +892,15 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->ref_mvs[mbmi->second_ref_frame], cm->ref_frame_sign_bias); - vp9_find_best_ref_mvs(xd, - xd->second_pre.y_buffer, - recon_y_stride, - mbmi->ref_mvs[mbmi->second_ref_frame], - &nearest_second, - &nearby_second); - best_mv_second = mbmi->ref_mvs[mbmi->second_ref_frame][0]; + if (mbmi->mode != ZEROMV) { + vp9_find_best_ref_mvs(xd, + xd->second_pre.y_buffer, + recon_y_stride, + mbmi->ref_mvs[mbmi->second_ref_frame], + &nearest_second, + &nearby_second); + best_mv_second = mbmi->ref_mvs[mbmi->second_ref_frame][0]; + } } } else { From 313d1100afad5544fb719897bbd24fe9fcc41d90 Mon Sep 17 00:00:00 2001 From: Paul Wilkins Date: Mon, 10 Dec 2012 12:38:48 +0000 Subject: [PATCH 38/77] Added update-able mv-ref probabilities. Part of NEW_MVREF experiment. Added update-able probabilities. Change-Id: I5a4fcf4aaed1d0d1dac980f69d535639a3d59401 --- vp9/common/vp9_blockd.h | 2 +- vp9/common/vp9_entropymv.h | 7 +++ vp9/decoder/vp9_decodemv.c | 9 +--- vp9/decoder/vp9_decodframe.c | 27 ++++++++++ vp9/encoder/vp9_bitstream.c | 92 ++++++++++++++++++++++++++++++++--- vp9/encoder/vp9_encodeframe.c | 16 +++--- vp9/encoder/vp9_onyx_if.c | 18 ------- vp9/encoder/vp9_onyx_int.h | 2 +- vp9/encoder/vp9_ratectrl.c | 10 ++++ vp9/encoder/vp9_rdopt.c | 12 ++--- 10 files changed, 144 insertions(+), 51 deletions(-) diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index ad5f3b36c..c430ea2ae 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -362,7 +362,7 @@ typedef struct macroblockd { vp9_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; #if CONFIG_NEW_MVREF - vp9_prob mb_mv_ref_id_probs[MAX_REF_FRAMES][3]; + vp9_prob mb_mv_ref_probs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES-1]; #endif // Segment features diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h index dcdd0ec53..f5cfee937 100644 --- a/vp9/common/vp9_entropymv.h +++ b/vp9/common/vp9_entropymv.h @@ -25,6 +25,13 @@ void vp9_adapt_nmv_probs(struct VP9Common *cm, int usehp); int vp9_use_nmv_hp(const MV *ref); #define VP9_NMV_UPDATE_PROB 255 + +#if CONFIG_NEW_MVREF +#define VP9_MVREF_UPDATE_PROB 252 +#define VP9_DEFAULT_MV_REF_PROB 192 +#define VP9_MV_REF_UPDATE_COST (14 << 8) +#endif + //#define MV_GROUP_UPDATE #define LOW_PRECISION_MV_UPDATE /* Use 7 bit forward update */ diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index f36a22409..f84611734 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -591,11 +591,6 @@ static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *bc) { } #endif -#if CONFIG_NEW_MVREF - // Temp defaults probabilities for ecnoding the MV ref id signal - vpx_memset(xd->mb_mv_ref_id_probs, 192, sizeof(xd->mb_mv_ref_id_probs)); -#endif - read_nmvprobs(bc, nmvc, xd->allow_high_precision_mv); } } @@ -936,7 +931,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, // Encode the index of the choice. best_index = - vp9_read_mv_ref_id(bc, xd->mb_mv_ref_id_probs[ref_frame]); + vp9_read_mv_ref_id(bc, xd->mb_mv_ref_probs[ref_frame]); best_mv.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int; @@ -945,7 +940,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, // Encode the index of the choice. best_index = - vp9_read_mv_ref_id(bc, xd->mb_mv_ref_id_probs[ref_frame]); + vp9_read_mv_ref_id(bc, xd->mb_mv_ref_probs[ref_frame]); best_mv_second.as_int = mbmi->ref_mvs[ref_frame][best_index].as_int; } } diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index c3a17eff0..af345824e 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -1608,6 +1608,33 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { } } +#if CONFIG_NEW_MVREF + // If Key frame reset mv ref id probabilities to defaults + if (pc->frame_type == KEY_FRAME) { + // Defaults probabilities for encoding the MV ref id signal + vpx_memset(xd->mb_mv_ref_probs, VP9_DEFAULT_MV_REF_PROB, + sizeof(xd->mb_mv_ref_probs)); + } else { + // Read any mv_ref index probability updates + int i, j; + + for (i = 0; i < MAX_REF_FRAMES; ++i) { + // Skip the dummy entry for intra ref frame. + if (i == INTRA_FRAME) { + continue; + } + + // Read any updates to probabilities + for (j = 0; j < MAX_MV_REF_CANDIDATES - 1; ++j) { + if (vp9_read(&header_bc, VP9_MVREF_UPDATE_PROB)) { + xd->mb_mv_ref_probs[i][j] = + (vp9_prob)vp9_read_literal(&header_bc, 8); + } + } + } + } +#endif + if (0) { FILE *z = fopen("decodestats.stt", "a"); fprintf(z, "%6d F:%d,G:%d,A:%d,L:%d,Q:%d\n", diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 54d44faec..956c16c48 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -12,6 +12,7 @@ #include "vp9/common/vp9_header.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_findnearmv.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/common/vp9_systemdependent.h" @@ -259,6 +260,56 @@ static void update_mode_probs(VP9_COMMON *cm, } } } + +#if CONFIG_NEW_MVREF +static void update_mv_ref_probs(VP9_COMP *cpi, + int mvref_probs[MAX_REF_FRAMES] + [MAX_MV_REF_CANDIDATES-1]) { + MACROBLOCKD *xd = &cpi->mb.e_mbd; + int rf; // Reference frame + int ref_c; // Motion reference candidate + int node; // Probability node index + + for (rf = 0; rf < MAX_REF_FRAMES; ++rf) { + int count = 0; + + // Skip the dummy entry for intra ref frame. + if (rf == INTRA_FRAME) { + continue; + } + + // Sum the counts for all candidates + for (ref_c = 0; ref_c < MAX_MV_REF_CANDIDATES; ++ref_c) { + count += cpi->mb_mv_ref_count[rf][ref_c]; + } + + // Calculate the tree node probabilities + for (node = 0; node < MAX_MV_REF_CANDIDATES-1; ++node) { + int new_prob, old_cost, new_cost; + unsigned int branch_cnts[2]; + + // How many hits on each branch at this node + branch_cnts[0] = cpi->mb_mv_ref_count[rf][node]; + branch_cnts[1] = count - cpi->mb_mv_ref_count[rf][node]; + + // Work out cost of coding branches with the old and optimal probability + old_cost = cost_branch256(branch_cnts, xd->mb_mv_ref_probs[rf][node]); + new_prob = get_prob(branch_cnts[0], count); + new_cost = cost_branch256(branch_cnts, new_prob); + + // Take current 0 branch cases out of residual count + count -= cpi->mb_mv_ref_count[rf][node]; + + if ((new_cost + VP9_MV_REF_UPDATE_COST) <= old_cost) { + mvref_probs[rf][node] = new_prob; + } else { + mvref_probs[rf][node] = xd->mb_mv_ref_probs[rf][node]; + } + } + } +} +#endif + static void write_ymode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_ymode_tree, p, vp9_ymode_encodings + m); } @@ -912,17 +963,13 @@ static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { if (mode == NEWMV) { // Encode the index of the choice. vp9_write_mv_ref_id(bc, - xd->mb_mv_ref_id_probs[rf], mi->best_index); - cpi->best_ref_index_counts[rf][mi->best_index]++; + xd->mb_mv_ref_probs[rf], mi->best_index); if (mi->second_ref_frame > 0) { // Encode the index of the choice. vp9_write_mv_ref_id( - bc, xd->mb_mv_ref_id_probs[mi->second_ref_frame], + bc, xd->mb_mv_ref_probs[mi->second_ref_frame], mi->best_second_index); - - cpi->best_ref_index_counts[mi->second_ref_frame] - [mi->best_second_index]++; } } #endif @@ -1964,7 +2011,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, // If appropriate update the inter mode probability context and code the // changes in the bitstream. - if ((pc->frame_type != KEY_FRAME)) { + if (pc->frame_type != KEY_FRAME) { int i, j; int new_context[INTER_MODE_CONTEXTS][4]; update_mode_probs(pc, new_context); @@ -1986,6 +2033,37 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } } +#if CONFIG_NEW_MVREF + if ((pc->frame_type != KEY_FRAME)) { + int new_mvref_probs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES-1]; + int i, j; + + update_mv_ref_probs(cpi, new_mvref_probs); + + for (i = 0; i < MAX_REF_FRAMES; ++i) { + // Skip the dummy entry for intra ref frame. + if (i == INTRA_FRAME) { + continue; + } + + // Encode any mandated updates to probabilities + for (j = 0; j < MAX_MV_REF_CANDIDATES - 1; ++j) { + if (new_mvref_probs[i][j] != xd->mb_mv_ref_probs[i][j]) { + vp9_write(&header_bc, 1, VP9_MVREF_UPDATE_PROB); + vp9_write_literal(&header_bc, new_mvref_probs[i][j], 8); + + // Only update the persistent copy if this is the "real pack" + if (!cpi->dummy_packing) { + xd->mb_mv_ref_probs[i][j] = new_mvref_probs[i][j]; + } + } else { + vp9_write(&header_bc, 0, VP9_MVREF_UPDATE_PROB); + } + } + } + } +#endif + vp9_clear_system_state(); // __asm emms; vp9_copy(cpi->common.fc.pre_coef_probs_4x4, diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 63fc1a949..b7a5b8065 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -392,7 +392,7 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x, MACROBLOCKD *xd = &x->e_mbd; int max_mv = MV_MAX; - cost = vp9_cost_mv_ref_id(xd->mb_mv_ref_id_probs[ref_frame], 0) + + cost = vp9_cost_mv_ref_id(xd->mb_mv_ref_probs[ref_frame], 0) + vp9_mv_bit_cost(&target_mv, &mv_ref_list[0], x->nmvjointcost, x->mvcost, 96, xd->allow_high_precision_mv); @@ -413,7 +413,7 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x, continue; } - cost2 = vp9_cost_mv_ref_id(xd->mb_mv_ref_id_probs[ref_frame], i) + + cost2 = vp9_cost_mv_ref_id(xd->mb_mv_ref_probs[ref_frame], i) + vp9_mv_bit_cost(&target_mv, &mv_ref_list[i], x->nmvjointcost, x->mvcost, 96, xd->allow_high_precision_mv); @@ -422,8 +422,6 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x, best_index = i; } } - - // best_index = x->mv_best_ref_index[ref_frame]; best_ref->as_int = mv_ref_list[best_index].as_int; return best_index; @@ -555,6 +553,7 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, best_index = pick_best_mv_ref(x, rf, mbmi->mv[0], mbmi->ref_mvs[rf], &best_mv); mbmi->best_index = best_index; + ++cpi->mb_mv_ref_count[rf][best_index]; if (mbmi->second_ref_frame > 0) { unsigned int best_index; @@ -563,6 +562,7 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_mvs[sec_ref_frame], &best_second_mv); mbmi->best_second_index = best_index; + ++cpi->mb_mv_ref_count[sec_ref_frame][best_index]; } #endif } @@ -1443,11 +1443,6 @@ static void encode_frame_internal(VP9_COMP *cpi) { // this frame which may be updated with each iteration of the recode loop. vp9_compute_mod_refprobs(cm); -#if CONFIG_NEW_MVREF - // temp stats reset - vp9_zero( cpi->best_ref_index_counts ); -#endif - // debug output #if DBG_PRNT_SEGMAP { @@ -1496,6 +1491,9 @@ static void encode_frame_internal(VP9_COMP *cpi) { #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS vp9_zero(cpi->coef_counts_32x32); #endif +#if CONFIG_NEW_MVREF + vp9_zero(cpi->mb_mv_ref_count); +#endif vp9_frame_init_quantizer(cpi); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 9e4367310..5043bc49a 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -2992,11 +2992,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Set default state for segment based loop filter update flags xd->mode_ref_lf_delta_update = 0; -#if CONFIG_NEW_MVREF - // Temp defaults probabilities for ecnoding the MV ref id signal - vpx_memset(xd->mb_mv_ref_id_probs, 192, - sizeof(xd->mb_mv_ref_id_probs)); -#endif // Set various flags etc to special state if it is a key frame if (cm->frame_type == KEY_FRAME) { @@ -3789,19 +3784,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // in this frame. update_base_skip_probs(cpi); -#if 0 //CONFIG_NEW_MVREF && CONFIG_INTERNAL_STATS - { - FILE *f = fopen("mv_ref_dist.stt", "a"); - unsigned int i; - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { - fprintf(f, "%10d", cpi->best_ref_index_counts[0][i]); - } - fprintf(f, "\n" ); - - fclose(f); - } -#endif - #if 0// 1 && CONFIG_INTERNAL_STATS { FILE *f = fopen("tmp.stt", "a"); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 0c2b8ccdb..6452be3c1 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -800,7 +800,7 @@ typedef struct VP9_COMP { unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS]; #if CONFIG_NEW_MVREF - unsigned int best_ref_index_counts[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; + unsigned int mb_mv_ref_count[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; #endif } VP9_COMP; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 540a68094..08ad54b51 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -277,6 +277,16 @@ void vp9_setup_key_frame(VP9_COMP *cpi) { vp9_update_mode_info_border(cm, cm->mip); vp9_update_mode_info_in_image(cm, cm->mi); + +#if CONFIG_NEW_MVREF + if (1) { + MACROBLOCKD *xd = &cpi->mb.e_mbd; + + // Defaults probabilities for encoding the MV ref id signal + vpx_memset(xd->mb_mv_ref_probs, VP9_DEFAULT_MV_REF_PROB, + sizeof(xd->mb_mv_ref_probs)); + } +#endif } void vp9_setup_inter_frame(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 69bc892bb..0d4c2b652 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3256,11 +3256,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, mv_pred(cpi, x, y_buffer[frame_type], yv12->y_stride, frame_type, block_size); -#if CONFIG_NEW_MVREF - // TODO(paulwilkins): Final choice of which of the best 4 candidates from - // above gives lowest error score when used in isolation. This stage encoder - // and sets the reference MV -#endif } static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -3300,8 +3295,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, case NEWMV: ref_mv[0] = mbmi->ref_mvs[refs[0]][0]; ref_mv[1] = mbmi->ref_mvs[refs[1]][0]; - // ref_mv[0] = mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]]; - // ref_mv[1] = mbmi->ref_mvs[refs[1]][x->mv_best_ref_index[refs[1]]]; + if (is_comp_pred) { if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV || frame_mv[NEWMV][refs[1]].as_int == INVALID_MV) @@ -3328,8 +3322,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_clamp_mv_min_max(x, &ref_mv[0]); + // mvp_full.as_int = ref_mv[0].as_int; mvp_full.as_int = - mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]].as_int; + mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]].as_int; + mvp_full.as_mv.col >>= 3; mvp_full.as_mv.row >>= 3; if (mvp_full.as_int != mvp_full.as_int) { From cad4a914290e3920c0eabedaaf78172648c47d0e Mon Sep 17 00:00:00 2001 From: Paul Wilkins Date: Wed, 2 Jan 2013 18:06:00 +0000 Subject: [PATCH 39/77] Change INT64_MAX to LLONG_MAX This is needed to make the windows build work after the removal of vp9_type_alisases.h. Change-Id: I8addf38e9f3c8b864e0e30a8916a26e0264dd02c --- vp9/encoder/vp9_rdopt.c | 80 ++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 0d4c2b652..cba9094f9 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1159,7 +1159,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, #if CONFIG_COMP_INTRA_PRED B_PREDICTION_MODE mode2; #endif - int64_t best_rd = INT64_MAX; + int64_t best_rd = LLONG_MAX; int rate = 0; int distortion; @@ -1362,7 +1362,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rat } if (total_rd >= best_rd) - return INT64_MAX; + return LLONG_MAX; #if CONFIG_COMP_INTRA_PRED cost += vp9_cost_bit(128, allow_comp); @@ -1386,7 +1386,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); int this_rate, this_rate_tokenonly; int this_distortion, s; - int64_t best_rd = INT64_MAX, this_rd; + int64_t best_rd = LLONG_MAX, this_rd; /* Y Search for 32x32 intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { @@ -1434,12 +1434,12 @@ static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; int rate, ratey; int distortion, skip; - int64_t best_rd = INT64_MAX; + int64_t best_rd = LLONG_MAX; int64_t this_rd; int i; for (i = 0; i < NB_TXFM_MODES; i++) - txfm_cache[i] = INT64_MAX; + txfm_cache[i] = LLONG_MAX; // Y Search for 16x16 intra prediction mode for (mode = DC_PRED; mode <= TM_PRED; mode++) { @@ -1519,7 +1519,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, MB_PREDICTION_MODE mode2; #endif MACROBLOCKD *xd = &x->e_mbd; - int64_t best_rd = INT64_MAX; + int64_t best_rd = LLONG_MAX; int distortion = 0, rate = 0; BLOCK *be = x->block + ib; BLOCKD *b = xd->block + ib; @@ -1912,7 +1912,7 @@ static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, #endif MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int64_t best_rd = INT64_MAX; + int64_t best_rd = LLONG_MAX; int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r); int rate_to, UNINITIALIZED_IS_SAFE(skip); @@ -1984,7 +1984,7 @@ static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi, MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int64_t best_rd = INT64_MAX; + int64_t best_rd = LLONG_MAX; int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r); int rate_to, UNINITIALIZED_IS_SAFE(skip); @@ -2099,7 +2099,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, int *skippable) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - int64_t best_rd = INT64_MAX, this_rd; + int64_t best_rd = LLONG_MAX, this_rd; int this_rate_tokenonly, this_rate; int this_distortion, s; @@ -2495,7 +2495,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, mbmi->txfm_size = tx_size; for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) { int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; - int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; + int64_t best_label_rd = LLONG_MAX, best_other_rd = LLONG_MAX; B_PREDICTION_MODE mode_selected = ZERO4X4; int bestlabelyrate = 0; @@ -2833,7 +2833,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, vpx_memset(&bsi, 0, sizeof(bsi)); for (i = 0; i < NB_TXFM_MODES; i++) - txfm_cache[i] = INT64_MAX; + txfm_cache[i] = LLONG_MAX; bsi.segment_rd = best_rd; bsi.ref_mv = best_ref_mv; @@ -3299,7 +3299,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (is_comp_pred) { if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV || frame_mv[NEWMV][refs[1]].as_int == INVALID_MV) - return INT64_MAX; + return LLONG_MAX; *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[0]], &ref_mv[0], x->nmvjointcost, x->mvcost, 96, @@ -3373,7 +3373,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // near) is 0,0 as this should then be coded using the zeromv mode. for (i = 0; i < num_refs; ++i) if (frame_mv[this_mode][refs[i]].as_int == 0) - return INT64_MAX; + return LLONG_MAX; case ZEROMV: default: break; @@ -3383,7 +3383,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Clip "next_nearest" so that it does not extend to far out of image clamp_mv2(&cur_mv[i], xd); if (mv_check_bounds(x, &cur_mv[i])) - return INT64_MAX; + return LLONG_MAX; mbmi->mv[i].as_int = cur_mv[i].as_int; } @@ -3569,20 +3569,20 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_txfm_diff[NB_TXFM_MODES]; int64_t best_pred_diff[NB_PREDICTION_TYPES]; int64_t best_pred_rd[NB_PREDICTION_TYPES]; - int64_t best_rd = INT64_MAX, best_intra_rd = INT64_MAX; + int64_t best_rd = LLONG_MAX, best_intra_rd = LLONG_MAX; #if CONFIG_COMP_INTERINTRA_PRED int is_best_interintra = 0; - int64_t best_intra16_rd = INT64_MAX; + int64_t best_intra16_rd = LLONG_MAX; int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED; #endif - int64_t best_overall_rd = INT64_MAX; + int64_t best_overall_rd = LLONG_MAX; int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly; int uv_intra_skippable = 0; int uv_intra_rate_8x8 = 0, uv_intra_distortion_8x8 = 0, uv_intra_rate_tokenonly_8x8 = 0; int uv_intra_skippable_8x8 = 0; int rate_y, UNINITIALIZED_IS_SAFE(rate_uv); int distortion_uv = INT_MAX; - int64_t best_yrd = INT64_MAX; + int64_t best_yrd = LLONG_MAX; #if CONFIG_PRED_FILTER int best_filter_state = 0; #endif @@ -3610,9 +3610,9 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_REF_FRAMES; i++) frame_mv[NEWMV][i].as_int = INVALID_MV; for (i = 0; i < NB_PREDICTION_TYPES; ++i) - best_pred_rd[i] = INT64_MAX; + best_pred_rd[i] = LLONG_MAX; for (i = 0; i < NB_TXFM_MODES; i++) - best_txfm_rd[i] = INT64_MAX; + best_txfm_rd[i] = LLONG_MAX; for (i = 0; i < NB_PARTITIONINGS; i++) { int j, k; @@ -3643,7 +3643,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, frame_mdcounts, y_buffer, u_buffer, v_buffer); } - *returnintra = INT64_MAX; + *returnintra = LLONG_MAX; x->skip = 0; @@ -3673,7 +3673,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (mode_index = 0; mode_index < MAX_MODES; mode_index += (!switchable_filter_index)) { - int64_t this_rd = INT64_MAX; + int64_t this_rd = LLONG_MAX; int disable_skip = 0, skippable = 0; int other_cost = 0; int compmode_cost = 0; @@ -3844,7 +3844,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += uv_intra_distortion; distortion_uv = uv_intra_distortion; } else { - this_rd = INT64_MAX; + this_rd = LLONG_MAX; disable_skip = 1; } } @@ -3938,7 +3938,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += uv_intra_distortion; distortion_uv = uv_intra_distortion; } else { - this_rd = INT64_MAX; + this_rd = LLONG_MAX; disable_skip = 1; } } @@ -3983,7 +3983,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion_uv; skippable = skippable && uv_skippable; } else { - this_rd = INT64_MAX; + this_rd = LLONG_MAX; disable_skip = 1; } @@ -4019,7 +4019,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &rate_uv, &distortion_uv, &mode_excluded, &disable_skip, recon_yoffset, mode_index, frame_mv); - if (this_rd == INT64_MAX) + if (this_rd == LLONG_MAX) continue; } @@ -4215,7 +4215,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best txfm size */ - if (!mode_excluded && this_rd != INT64_MAX) { + if (!mode_excluded && this_rd != LLONG_MAX) { for (i = 0; i < NB_TXFM_MODES; i++) { int64_t adj_rd; if (this_mode != B_PRED) { @@ -4314,7 +4314,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } for (i = 0; i < NB_PREDICTION_TYPES; ++i) { - if (best_pred_rd[i] == INT64_MAX) + if (best_pred_rd[i] == LLONG_MAX) best_pred_diff[i] = INT_MIN; else best_pred_diff[i] = best_rd - best_pred_rd[i]; @@ -4322,7 +4322,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!x->skip) { for (i = 0; i < NB_TXFM_MODES; i++) { - if (best_txfm_rd[i] == INT64_MAX) + if (best_txfm_rd[i] == LLONG_MAX) best_txfm_diff[i] = INT_MIN; else best_txfm_diff[i] = best_rd - best_txfm_rd[i]; @@ -4550,8 +4550,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int mdcounts[4]; int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; int saddone = 0; - int64_t best_rd = INT64_MAX; - int64_t best_yrd = INT64_MAX; + int64_t best_rd = LLONG_MAX; + int64_t best_yrd = LLONG_MAX; int64_t best_txfm_rd[NB_TXFM_MODES]; int64_t best_txfm_diff[NB_TXFM_MODES]; int64_t best_pred_diff[NB_PREDICTION_TYPES]; @@ -4561,10 +4561,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, unsigned int ref_costs[MAX_REF_FRAMES]; #if CONFIG_COMP_INTERINTRA_PRED int is_best_interintra = 0; - int64_t best_intra16_rd = INT64_MAX; + int64_t best_intra16_rd = LLONG_MAX; int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED; #endif - int64_t best_overall_rd = INT64_MAX; + int64_t best_overall_rd = LLONG_MAX; int rate_uv_4x4 = 0, rate_uv_8x8 = 0, rate_uv_tokenonly_4x4 = 0, rate_uv_tokenonly_8x8 = 0; int dist_uv_4x4 = 0, dist_uv_8x8 = 0, uv_skip_4x4 = 0, uv_skip_8x8 = 0; @@ -4582,9 +4582,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); for (i = 0; i < NB_PREDICTION_TYPES; ++i) - best_pred_rd[i] = INT64_MAX; + best_pred_rd[i] = LLONG_MAX; for (i = 0; i < NB_TXFM_MODES; i++) - best_txfm_rd[i] = INT64_MAX; + best_txfm_rd[i] = LLONG_MAX; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { @@ -4622,7 +4622,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, for (mode_index = 0; mode_index < MAX_MODES; mode_index += (!switchable_filter_index)) { int mode_excluded = 0; - int64_t this_rd = INT64_MAX; + int64_t this_rd = LLONG_MAX; int disable_skip = 0; int other_cost = 0; int compmode_cost = 0; @@ -4784,7 +4784,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &rate_uv, &distortion_uv, &mode_excluded, &disable_skip, recon_yoffset, mode_index, frame_mv); - if (this_rd == INT64_MAX) + if (this_rd == LLONG_MAX) continue; } @@ -4944,7 +4944,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best txfm size */ - if (!mode_excluded && this_rd != INT64_MAX) { + if (!mode_excluded && this_rd != LLONG_MAX) { for (i = 0; i < NB_TXFM_MODES; i++) { int64_t adj_rd; if (this_mode != B_PRED) { @@ -5009,7 +5009,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); for (i = 0; i < NB_PREDICTION_TYPES; ++i) { - if (best_pred_rd[i] == INT64_MAX) + if (best_pred_rd[i] == LLONG_MAX) best_pred_diff[i] = INT_MIN; else best_pred_diff[i] = best_rd - best_pred_rd[i]; @@ -5017,7 +5017,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (!x->skip) { for (i = 0; i < NB_TXFM_MODES; i++) { - if (best_txfm_rd[i] == INT64_MAX) + if (best_txfm_rd[i] == LLONG_MAX) best_txfm_diff[i] = INT_MIN; else best_txfm_diff[i] = best_rd - best_txfm_rd[i]; From bd28510ef97883bcdf0d76419cb19f86940355e2 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Thu, 3 Jan 2013 08:00:00 -0800 Subject: [PATCH 40/77] Merge cost_coeffs_2x2() into cost_coeffs() Remove special case function cost_coeffs_2x2() and change function cost_coeffs() to handle 2nd order haar block as it is handle all other block types already. Change-Id: I2aac6f81ee0ae9e03d6a8da4f8681d69b79ce41f --- vp9/encoder/vp9_rdopt.c | 53 +++++++---------------------------------- 1 file changed, 8 insertions(+), 45 deletions(-) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index cba9094f9..bc8e35a8a 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -518,49 +518,6 @@ int vp9_uvsse(MACROBLOCK *x) { #else #define PT pt #endif - -static int cost_coeffs_2x2(MACROBLOCK *mb, - BLOCKD *b, PLANE_TYPE type, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { - int nodc = (type == PLANE_TYPE_Y_NO_DC); - int c = nodc; /* start at coef 0, unless Y with Y2 */ - int eob = b->eob; - int pt; /* surrounding block/prev coef predictor */ - int cost = 0; - int16_t *qcoeff_ptr = b->qcoeff; -#if CONFIG_NEWCOEFCONTEXT - const int *neighbors = vp9_default_zig_zag1d_4x4_neighbors; - int pn; -#endif - - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); - assert(eob <= 4); - -#if CONFIG_NEWCOEFCONTEXT - pn = pt; -#endif - for (; c < eob; c++) { - int v = qcoeff_ptr[vp9_default_zig_zag1d_4x4[c]]; - int t = vp9_dct_value_tokens_ptr[v].Token; - cost += mb->token_costs[TX_8X8][type][vp9_coef_bands_4x4[c]][pt][t]; - cost += vp9_dct_value_cost_ptr[v]; - pt = vp9_prev_token_class[t]; -#if CONFIG_NEWCOEFCONTEXT - if (c < 4 - 1) - pn = vp9_get_coef_neighbor_context( - qcoeff_ptr, nodc, neighbors, vp9_default_zig_zag1d_4x4[c + 1]); -#endif - } - - if (c < 4) - cost += mb->token_costs[TX_8X8][type][vp9_coef_bands_4x4[c]] - [PT][DCT_EOB_TOKEN]; - // is eob first coefficient; - pt = (c > !type); - *a = *l = pt; - return cost; -} - static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, TX_SIZE tx_size) { @@ -617,6 +574,11 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, bb = xd->block + ib; tx_type = get_tx_type_8x8(xd, bb); } + } else if (type == PLANE_TYPE_Y2) { + scan = vp9_default_zig_zag1d_4x4; + band = vp9_coef_bands_4x4; + default_eob = 4; + tx_type = DCT_DCT; } break; case TX_16X16: @@ -788,9 +750,10 @@ static int rdcost_mby_8x8(MACROBLOCK *mb, int has_2nd_order, int backup) { TX_8X8); if (has_2nd_order) - cost += cost_coeffs_2x2(mb, xd->block + 24, PLANE_TYPE_Y2, + cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2, ta + vp9_block2above[TX_8X8][24], - tl + vp9_block2left[TX_8X8][24]); + tl + vp9_block2left[TX_8X8][24], + TX_8X8); return cost; } From 259b800832bd8555d60895ee68ad108cd515ecb9 Mon Sep 17 00:00:00 2001 From: Adrian Grange Date: Thu, 20 Dec 2012 14:56:19 -0800 Subject: [PATCH 41/77] New interpolation filter selection algorithm Old Scheme: When SWITCHABLE filter selection is enabled the encoder evaluates the use of each interpolation filter type and selects the best one to use at the MB level. A frame- level flag can be set to force the use of a particular filter type for all MBs in a frame if it is more efficient to encode that way. The logic here involved a Q dependent threshold that assumed that the second 8-tap filter was a high-pass filter. However, this requires a trip around the recode loop. If the frame-level flag indicates use of a particular filter, the other filters are not evaluated in the pick_mode loop. New Scheme: Each filter type is evaluated at the MB level and a record of the best filter is kept, irrespective of what filter is signaled at the frame-level. Once all MBs have been encoded, a decision is made as to what frame-level mode to set for the *next* frame. If one filter is used by 80% or more of the MBs, then this filter is forced since it is assumed that this will be more efficient if the next frame has similar characteristics. i.e. there is a one-frame lag between measuring the filter selection and setting the frame-level mode to use. Change-Id: I6a7e7ced8f27e120fafb99db2dc9c6293f8d20f7 --- vp9/encoder/vp9_encodeframe.c | 7 + vp9/encoder/vp9_onyx_if.c | 73 +++++---- vp9/encoder/vp9_onyx_int.h | 2 + vp9/encoder/vp9_rdopt.c | 294 ++++++++++++++++++++-------------- 4 files changed, 221 insertions(+), 155 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index b7a5b8065..3219e12da 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -45,6 +45,8 @@ int enc_debug = 0; #endif +extern void select_interp_filter_type(VP9_COMP *cpi); + static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset, int output_enabled, @@ -1477,6 +1479,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { cpi->pred_filter_off_count = 0; #endif vp9_zero(cpi->switchable_interp_count); + vp9_zero(cpi->best_switchable_interp_count); xd->mode_info_context = cm->mi; xd->prev_mode_info_context = cm->prev_mi; @@ -1828,6 +1831,10 @@ void vp9_encode_frame(VP9_COMP *cpi) { #endif } } + + // Update interpolation filter strategy for next frame. + if ((cpi->common.frame_type != KEY_FRAME) && (cpi->sf.search_best_filter)) + select_interp_filter_type(cpi); } else { encode_frame_internal(cpi); } diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 5043bc49a..9b186c2c4 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -2841,6 +2841,45 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { } +void select_interp_filter_type(VP9_COMP *cpi) { + int i; + int high_filter_index; + unsigned int thresh; + unsigned int high_count = 0; + unsigned int count_sum = 0; + unsigned int *hist = cpi->best_switchable_interp_count; + + if (DEFAULT_INTERP_FILTER != SWITCHABLE) { + cpi->common.mcomp_filter_type = DEFAULT_INTERP_FILTER; + return; + } + + // TODO(agrange): Look at using RD criteria to select the interpolation + // filter to use for the next frame rather than this simpler counting scheme. + + // Select the interpolation filter mode for the next frame + // based on the selection frequency seen in the current frame. + for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { + unsigned int count = hist[i]; + count_sum += count; + if (count > high_count) { + high_count = count; + high_filter_index = i; + } + } + + thresh = (unsigned int)(0.80 * count_sum); + + if (high_count > thresh) { + // One filter accounts for 80+% of cases so force the next + // frame to use this filter exclusively using frame-level flag. + cpi->common.mcomp_filter_type = vp9_switchable_interp[high_filter_index]; + } else { + // Use a MB-level switchable filter selection strategy. + cpi->common.mcomp_filter_type = SWITCHABLE; + } +} + #if CONFIG_PRED_FILTER void select_pred_filter_mode(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; @@ -3131,7 +3170,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (cpi->active_worst_quality < cpi->active_best_quality) cpi->active_worst_quality = cpi->active_best_quality; - // Specuial case code to try and match quality with forced key frames + // Special case code to try and match quality with forced key frames if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) { Q = cpi->last_boosted_qindex; } else { @@ -3485,37 +3524,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (cpi->is_src_frame_alt_ref) Loop = FALSE; - if (cm->frame_type != KEY_FRAME && - !sf->search_best_filter && - cm->mcomp_filter_type == SWITCHABLE) { - int interp_factor = Q / 3; /* denominator is 256 */ - int count[VP9_SWITCHABLE_FILTERS]; - int tot_count = 0, c = 0, thr; - int i, j; - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { - count[i] = 0; - for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) { - count[i] += cpi->switchable_interp_count[j][i]; - } - tot_count += count[i]; - } - - thr = ((tot_count * interp_factor + 128) >> 8); - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { - c += (count[i] >= thr); - } - if (c == 1) { - /* Mostly one filter is used. So set the filter at frame level */ - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { - if (count[i]) { - cm->mcomp_filter_type = vp9_switchable_interp[i]; - Loop = TRUE; /* Make sure to loop since the filter changed */ - break; - } - } - } - } - if (Loop == FALSE && cm->frame_type != KEY_FRAME && sf->search_best_filter) { if (mcomp_filter_index < mcomp_filters) { int64_t err = vp9_calc_ss_err(cpi->Source, @@ -3570,6 +3578,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (Loop == TRUE) { loop_count++; + #if CONFIG_INTERNAL_STATS cpi->tot_recode_hits++; #endif diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 6452be3c1..7c9181ba7 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -799,6 +799,8 @@ typedef struct VP9_COMP { #endif unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS]; + unsigned int best_switchable_interp_count[VP9_SWITCHABLE_FILTERS]; + #if CONFIG_NEW_MVREF unsigned int mb_mv_ref_count[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; #endif diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index cba9094f9..d6d3fcaaa 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3498,14 +3498,16 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - if (is_comp_pred) { - *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY); - } else { - *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY); - } + if (!(*mode_excluded)) { + if (is_comp_pred) { + *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY); + } else { + *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY); + } #if CONFIG_COMP_INTERINTRA_PRED - if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1; + if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1; #endif + } if (!x->skip) { if (block_size == BLOCK_16X16) { @@ -3556,6 +3558,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, PARTITION_INFO best_partition; int_mv best_ref_mv, second_best_ref_mv; MB_PREDICTION_MODE this_mode; + MB_PREDICTION_MODE best_mode = DC_PRED; MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; int i, best_mode_index = 0; int mode8x8[2][4]; @@ -3576,6 +3579,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED; #endif int64_t best_overall_rd = LLONG_MAX; + INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE; int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly; int uv_intra_skippable = 0; int uv_intra_rate_8x8 = 0, uv_intra_distortion_8x8 = 0, uv_intra_rate_tokenonly_8x8 = 0; @@ -3697,16 +3701,20 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_PRED_FILTER mbmi->pred_filter_enabled = 0; #endif - if (cpi->common.mcomp_filter_type == SWITCHABLE && - this_mode >= NEARESTMV && this_mode <= SPLITMV) { + + // Evaluate all sub-pel filters irrespective of whether we can use + // them for this frame. + if (this_mode >= NEARESTMV && this_mode <= SPLITMV) { mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index++]; if (switchable_filter_index == VP9_SWITCHABLE_FILTERS) switchable_filter_index = 0; - } else { - mbmi->interp_filter = cpi->common.mcomp_filter_type; + if ((cm->mcomp_filter_type != SWITCHABLE) && + (cm->mcomp_filter_type != mbmi->interp_filter)) { + mode_excluded = 1; + } + vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); } - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); // Test best rd so far against threshold for trying this mode. if (best_rd <= cpi->rd_threshes[mode_index]) @@ -3972,6 +3980,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] [vp9_switchable_interp_map[mbmi->interp_filter]]; + // If even the 'Y' rd value of split is higher than best so far // then dont bother looking at UV if (tmp_rd < best_yrd) { @@ -3987,10 +3996,12 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, disable_skip = 1; } - if (is_comp_pred) - mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; - else - mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; + if (!mode_excluded) { + if (is_comp_pred) + mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; + else + mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; + } compmode_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred); @@ -4103,13 +4114,14 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } #endif - if (!disable_skip && mbmi->ref_frame == INTRA_FRAME) for (i = 0; i < NB_PREDICTION_TYPES; ++i) best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); if (this_rd < best_overall_rd) { best_overall_rd = this_rd; + best_filter = mbmi->interp_filter; + best_mode = this_mode; #if CONFIG_PRED_FILTER best_filter_state = mbmi->pred_filter_enabled; #endif @@ -4125,117 +4137,121 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (cm->pred_filter_mode == mbmi->pred_filter_enabled)) { #endif - // Did this mode help.. i.e. is it the new best mode - if (this_rd < best_rd || x->skip) { - if (!mode_excluded) { - /* - if (mbmi->second_ref_frame == INTRA_FRAME) { - printf("rd %d best %d bestintra16 %d\n", this_rd, best_rd, best_intra16_rd); - } - */ - // Note index of best mode so far - best_mode_index = mode_index; + // Did this mode help.. i.e. is it the new best mode + if (this_rd < best_rd || x->skip) { + if (!mode_excluded) { + /* + if (mbmi->second_ref_frame == INTRA_FRAME) { + printf("rd %d best %d bestintra16 %d\n", this_rd, best_rd, best_intra16_rd); + } + */ + // Note index of best mode so far + best_mode_index = mode_index; - if (this_mode <= B_PRED) { - if (mbmi->txfm_size != TX_4X4 - && this_mode != B_PRED - && this_mode != I8X8_PRED) - mbmi->uv_mode = uv_intra_mode_8x8; - else - mbmi->uv_mode = uv_intra_mode; - /* required for left and above block mv */ - mbmi->mv[0].as_int = 0; - } - - other_cost += ref_costs[mbmi->ref_frame]; - - /* Calculate the final y RD estimate for this mode */ - best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost), - (distortion2 - distortion_uv)); - - *returnrate = rate2; - *returndistortion = distortion2; - best_rd = this_rd; - vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO)); - vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO)); - - if ((this_mode == B_PRED) - || (this_mode == I8X8_PRED) - || (this_mode == SPLITMV)) - for (i = 0; i < 16; i++) { - best_bmodes[i] = xd->block[i].bmi; - } + if (this_mode <= B_PRED) { + if (mbmi->txfm_size != TX_4X4 + && this_mode != B_PRED + && this_mode != I8X8_PRED) + mbmi->uv_mode = uv_intra_mode_8x8; + else + mbmi->uv_mode = uv_intra_mode; + /* required for left and above block mv */ + mbmi->mv[0].as_int = 0; } - // Testing this mode gave rise to an improvement in best error score. - // Lower threshold a bit for next time - cpi->rd_thresh_mult[mode_index] = - (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? - cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; - cpi->rd_threshes[mode_index] = - (cpi->rd_baseline_thresh[mode_index] >> 7) * - cpi->rd_thresh_mult[mode_index]; + other_cost += ref_costs[mbmi->ref_frame]; + + /* Calculate the final y RD estimate for this mode */ + best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost), + (distortion2 - distortion_uv)); + + *returnrate = rate2; + *returndistortion = distortion2; + best_rd = this_rd; + vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO)); + vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO)); + + if ((this_mode == B_PRED) + || (this_mode == I8X8_PRED) + || (this_mode == SPLITMV)) + for (i = 0; i < 16; i++) { + best_bmodes[i] = xd->block[i].bmi; + } } + + // Testing this mode gave rise to an improvement in best error score. + // Lower threshold a bit for next time + cpi->rd_thresh_mult[mode_index] = + (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? + cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; + cpi->rd_threshes[mode_index] = + (cpi->rd_baseline_thresh[mode_index] >> 7) * + cpi->rd_thresh_mult[mode_index]; + } else { // If the mode did not help improve the best error case then raise the // threshold for testing that mode next time around. - else { - cpi->rd_thresh_mult[mode_index] += 4; + cpi->rd_thresh_mult[mode_index] += 4; - if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) - cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; + if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) + cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; + cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) + * cpi->rd_thresh_mult[mode_index]; + } + + /* keep record of best compound/single-only prediction */ + if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) { + int64_t single_rd, hybrid_rd; + int single_rate, hybrid_rate; + + if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { + single_rate = rate2 - compmode_cost; + hybrid_rate = rate2; + } else { + single_rate = rate2; + hybrid_rate = rate2 + compmode_cost; } - /* keep record of best compound/single-only prediction */ - if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) { - int64_t single_rd, hybrid_rd; - int single_rate, hybrid_rate; + single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); + hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { - single_rate = rate2 - compmode_cost; - hybrid_rate = rate2; + if (mbmi->second_ref_frame <= INTRA_FRAME && + single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { + best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; + } else if (mbmi->second_ref_frame > INTRA_FRAME && + single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { + best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; + } + if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) + best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; + } + + /* keep record of best txfm size */ + if (!mode_excluded && this_rd != LLONG_MAX) { + for (i = 0; i < NB_TXFM_MODES; i++) { + int64_t adj_rd; + if (this_mode != B_PRED) { + const int64_t txfm_mode_diff = + txfm_cache[i] - txfm_cache[cm->txfm_mode]; + adj_rd = this_rd + txfm_mode_diff; } else { - single_rate = rate2; - hybrid_rate = rate2 + compmode_cost; + adj_rd = this_rd; } - - single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); - hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - - if (mbmi->second_ref_frame <= INTRA_FRAME && - single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { - best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; - } else if (mbmi->second_ref_frame > INTRA_FRAME && - single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { - best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; - } - if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) - best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; + if (adj_rd < best_txfm_rd[i]) + best_txfm_rd[i] = adj_rd; } + } - /* keep record of best txfm size */ - if (!mode_excluded && this_rd != LLONG_MAX) { - for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t adj_rd; - if (this_mode != B_PRED) { - const int64_t txfm_mode_diff = - txfm_cache[i] - txfm_cache[cm->txfm_mode]; - adj_rd = this_rd + txfm_mode_diff; - } else { - adj_rd = this_rd; - } - if (adj_rd < best_txfm_rd[i]) - best_txfm_rd[i] = adj_rd; - } - } + if (x->skip && !mode_excluded) + break; + } #if CONFIG_PRED_FILTER } #endif - if (x->skip && !mode_excluded) - break; - } + assert((cm->mcomp_filter_type == SWITCHABLE) || + (cm->mcomp_filter_type == best_mbmode.interp_filter) || + (best_mbmode.mode <= B_PRED)); #if CONFIG_PRED_FILTER // Update counts for prediction filter usage @@ -4248,6 +4264,11 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, ++cpi->interintra_select_count[is_best_interintra]; #endif + // Accumulate filter usage stats + // TODO(agrange): Use RD criteria to select interpolation filter mode. + if ((best_mode >= NEARESTMV) && (best_mode <= SPLITMV)) + ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]]; + // Reduce the activation RD thresholds for the best choice mode if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { @@ -4535,6 +4556,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; MB_PREDICTION_MODE this_mode; + MB_PREDICTION_MODE best_mode = DC_PRED; MV_REFERENCE_FRAME ref_frame; unsigned char segment_id = xd->mode_info_context->mbmi.segment_id; int comp_pred, i; @@ -4565,6 +4587,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED; #endif int64_t best_overall_rd = LLONG_MAX; + INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE; int rate_uv_4x4 = 0, rate_uv_8x8 = 0, rate_uv_tokenonly_4x4 = 0, rate_uv_tokenonly_8x8 = 0; int dist_uv_4x4 = 0, dist_uv_8x8 = 0, uv_skip_4x4 = 0, uv_skip_8x8 = 0; @@ -4659,16 +4682,19 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); #endif - if (cpi->common.mcomp_filter_type == SWITCHABLE && - this_mode >= NEARESTMV && this_mode <= SPLITMV) { + // Evaluate all sub-pel filters irrespective of whether we can use + // them for this frame. + if (this_mode >= NEARESTMV && this_mode <= SPLITMV) { mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index++]; if (switchable_filter_index == VP9_SWITCHABLE_FILTERS) switchable_filter_index = 0; - } else { - mbmi->interp_filter = cpi->common.mcomp_filter_type; + if ((cm->mcomp_filter_type != SWITCHABLE) && + (cm->mcomp_filter_type != mbmi->interp_filter)) { + mode_excluded = 1; + } + vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); } - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); // if (!(cpi->ref_frame_flags & flag_list[ref_frame])) // continue; @@ -4693,15 +4719,19 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, xd->second_pre.y_buffer = y_buffer[second_ref]; xd->second_pre.u_buffer = u_buffer[second_ref]; xd->second_pre.v_buffer = v_buffer[second_ref]; - mode_excluded = cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; + mode_excluded = + mode_excluded ? + mode_excluded : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; } else { // mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; if (ref_frame != INTRA_FRAME) { if (mbmi->second_ref_frame != INTRA_FRAME) - mode_excluded = cm->comp_pred_mode == COMP_PREDICTION_ONLY; + mode_excluded = + mode_excluded ? + mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY; #if CONFIG_COMP_INTERINTRA_PRED else - mode_excluded = !cm->use_interintra; + mode_excluded = mode_excluded ? mode_excluded : !cm->use_interintra; #endif } } @@ -4872,6 +4902,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (this_rd < best_overall_rd) { best_overall_rd = this_rd; + best_filter = mbmi->interp_filter; + best_mode = this_mode; #if CONFIG_COMP_INTERINTRA_PRED is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME); #endif @@ -4900,20 +4932,27 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO)); } #if 0 - // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time - cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; - cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; + // Testing this mode gave rise to an improvement in best error score. + // Lower threshold a bit for next time + cpi->rd_thresh_mult[mode_index] = + (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? + cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; + cpi->rd_threshes[mode_index] = + (cpi->rd_baseline_thresh[mode_index] >> 7) + * cpi->rd_thresh_mult[mode_index]; #endif - } - // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around. - else { + } else { + // If the mode did not help improve the best error case then + // raise the threshold for testing that mode next time around. #if 0 cpi->rd_thresh_mult[mode_index] += 4; if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index]; + cpi->rd_threshes[mode_index] = + (cpi->rd_baseline_thresh[mode_index] >> 7) + * cpi->rd_thresh_mult[mode_index]; #endif } @@ -4961,11 +5000,20 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, break; } + assert((cm->mcomp_filter_type == SWITCHABLE) || + (cm->mcomp_filter_type == best_mbmode.interp_filter) || + (best_mbmode.mode <= B_PRED)); + #if CONFIG_COMP_INTERINTRA_PRED ++cpi->interintra_select_count[is_best_interintra]; // if (is_best_interintra) printf("best_interintra\n"); #endif + // Accumulate filter usage stats + // TODO(agrange): Use RD criteria to select interpolation filter mode. + if ((best_mode >= NEARESTMV) && (best_mode <= SPLITMV)) + ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]]; + // TODO(rbultje) integrate with RD thresholding #if 0 // Reduce the activation RD thresholds for the best choice mode From 83664f457b8ea7c907fdd3c4a8f9c80358c66212 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Thu, 3 Jan 2013 09:00:30 -0800 Subject: [PATCH 42/77] make cost_coeffs() and tokenize_b() consistent Change-Id: I7cdb5c32a1400f88ec36d08ea982e38b77731602 --- vp9/encoder/vp9_rdopt.c | 104 +++++++++++++++---------------------- vp9/encoder/vp9_tokenize.c | 6 +-- 2 files changed, 46 insertions(+), 64 deletions(-) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index bc8e35a8a..1ac9215b4 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -518,88 +518,71 @@ int vp9_uvsse(MACROBLOCK *x) { #else #define PT pt #endif -static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, +static int cost_coeffs(MACROBLOCK *mb, + BLOCKD *b, PLANE_TYPE type, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, TX_SIZE tx_size) { + int pt; const int eob = b->eob; - int nodc = (type == PLANE_TYPE_Y_NO_DC); - int c = nodc; /* start at coef 0, unless Y with Y2 */ - int cost = 0, default_eob, seg_eob; - int pt; /* surrounding block/prev coef predictor */ - int const *scan, *band; - int16_t *qcoeff_ptr = b->qcoeff; MACROBLOCKD *xd = &mb->e_mbd; - MB_MODE_INFO *mbmi = &mb->e_mbd.mode_info_context->mbmi; - TX_TYPE tx_type = DCT_DCT; - int segment_id = mbmi->segment_id; +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + const int ib = (int)(b - xd->block); +#endif + int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; + int cost = 0, seg_eob; + const int segment_id = xd->mode_info_context->mbmi.segment_id; + const int *scan, *band; + int16_t *qcoeff_ptr = b->qcoeff; + const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type(xd, b) : DCT_DCT; #if CONFIG_NEWCOEFCONTEXT const int *neighbors; int pn; #endif - scan = vp9_default_zig_zag1d_4x4; - band = vp9_coef_bands_4x4; - default_eob = 16; + + ENTROPY_CONTEXT a_ec = *a, l_ec = *l; switch (tx_size) { case TX_4X4: + scan = vp9_default_zig_zag1d_4x4; + band = vp9_coef_bands_4x4; + seg_eob = 16; if (type == PLANE_TYPE_Y_WITH_DC) { - tx_type = get_tx_type_4x4(xd, b); - if (tx_type != DCT_DCT) { - switch (tx_type) { - case ADST_DCT: - scan = vp9_row_scan_4x4; - break; - - case DCT_ADST: - scan = vp9_col_scan_4x4; - break; - - default: - scan = vp9_default_zig_zag1d_4x4; - break; - } + if (tx_type == ADST_DCT) { + scan = vp9_row_scan_4x4; + } else if (tx_type == DCT_ADST) { + scan = vp9_col_scan_4x4; } } - break; case TX_8X8: - scan = vp9_default_zig_zag1d_8x8; - band = vp9_coef_bands_8x8; - default_eob = 64; - if (type == PLANE_TYPE_Y_WITH_DC) { - BLOCKD *bb; - int ib = (int)(b - xd->block); - if (ib < 16) { - ib = (ib & 8) + ((ib & 4) >> 1); - bb = xd->block + ib; - tx_type = get_tx_type_8x8(xd, bb); - } - } else if (type == PLANE_TYPE_Y2) { + if (type == PLANE_TYPE_Y2) { scan = vp9_default_zig_zag1d_4x4; band = vp9_coef_bands_4x4; - default_eob = 4; - tx_type = DCT_DCT; + seg_eob = 4; + } else { + scan = vp9_default_zig_zag1d_8x8; + band = vp9_coef_bands_8x8; + seg_eob = 64; } break; case TX_16X16: scan = vp9_default_zig_zag1d_16x16; band = vp9_coef_bands_16x16; - default_eob = 256; - if (type == PLANE_TYPE_Y_WITH_DC) { - tx_type = get_tx_type_16x16(xd, b); + seg_eob = 256; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - } else if (type == PLANE_TYPE_UV) { - int ib = (int)(b - xd->block) - 16; - - qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * ib; -#endif + if (type == PLANE_TYPE_UV) { + const int uv_idx = ib - 16; + qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx; } +#endif break; #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS case TX_32X32: scan = vp9_default_zig_zag1d_32x32; band = vp9_coef_bands_32x32; - default_eob = 1024; + seg_eob = 1024; qcoeff_ptr = xd->sb_coeff_data.qcoeff; break; #endif @@ -607,17 +590,16 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, abort(); break; } - if (vp9_segfeature_active(&mb->e_mbd, segment_id, SEG_LVL_EOB)) - seg_eob = vp9_get_segdata(&mb->e_mbd, segment_id, SEG_LVL_EOB); - else - seg_eob = default_eob; - - VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); + VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); #if CONFIG_NEWCOEFCONTEXT neighbors = vp9_get_coef_neighbors_handle(scan); pn = pt; #endif + + if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) + seg_eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); + if (tx_type != DCT_DCT) { for (; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; @@ -628,7 +610,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, #if CONFIG_NEWCOEFCONTEXT if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1])) pn = vp9_get_coef_neighbor_context( - qcoeff_ptr, nodc, neighbors, scan[c + 1]); + qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]); else pn = pt; #endif @@ -646,7 +628,7 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type, #if CONFIG_NEWCOEFCONTEXT if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1])) pn = vp9_get_coef_neighbor_context( - qcoeff_ptr, nodc, neighbors, scan[c + 1]); + qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]); else pn = pt; #endif diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 58bb251f5..7a364b3e8 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -120,9 +120,9 @@ static void tokenize_b(VP9_COMP *cpi, int pt; /* near block/prev token context index */ int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; const BLOCKD * const b = xd->block + ib; - int eob = b->eob; /* one beyond last nonzero coeff */ + const int eob = b->eob; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp; /* store tokens starting here */ - const short *qcoeff_ptr = b->qcoeff; + int16_t *qcoeff_ptr = b->qcoeff; int seg_eob; const int segment_id = xd->mode_info_context->mbmi.segment_id; const int *bands, *scan; @@ -214,7 +214,7 @@ static void tokenize_b(VP9_COMP *cpi, } #if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 if (type == PLANE_TYPE_UV) { - int uv_idx = (((int) (b - xd->block)) - 16) >> 2; + int uv_idx = (ib - 16) >> 2; qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 256 * uv_idx; } #endif From c6ba3a3d8509779168e144ba273c14be5c063bc2 Mon Sep 17 00:00:00 2001 From: Paul Wilkins Date: Thu, 3 Jan 2013 15:14:36 +0000 Subject: [PATCH 43/77] Further change to mv reference search. This experimental change reorders the search so that all possible references that match the target reference frame are tested first and these in order of distance from the current block. These will usually be the highest scoring candidates. If we do not find enough good candidates this way we try non matching cases. These will usually be lower scoring candidates. The change in order together with breakouts when we have found enough candidates should reduce the computational cost and especially reduce the number of sort operations. Quality Results: Std Hd +0.228%, Hd +0.074%, YT +0.046%, derf +0.137% This effect is probably due to the fact that more distant weak candidates are now less likely to get "promoted" over near candidates even if they are repeated. Change-Id: Iec37e77d88a48ad0ee1f315b14327a95d63f81f6 --- vp9/common/vp9_mvref_common.c | 222 ++++++++++++++++------------------ 1 file changed, 104 insertions(+), 118 deletions(-) diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index bbed0554f..4b576e895 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -25,6 +25,7 @@ static int sb_mv_ref_search[MVREF_NEIGHBOURS][2] = { static int sb_ref_distance_weight[MVREF_NEIGHBOURS] = { 3, 3, 2, 2, 2, 1, 1, 1 }; #endif + // clamp_mv #define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units static void clamp_mv(const MACROBLOCKD *xd, int_mv *mv) { @@ -40,10 +41,29 @@ static void clamp_mv(const MACROBLOCKD *xd, int_mv *mv) { mv->as_mv.row = xd->mb_to_bottom_edge + MV_BORDER; } +// Gets a candidate refenence motion vector from the given mode info +// structure if one exists that matches the given reference frame. +static int get_matching_candidate( + const MODE_INFO *candidate_mi, + MV_REFERENCE_FRAME ref_frame, + int_mv *c_mv +) { + int ret_val = TRUE; -// Gets a best matching candidate refenence motion vector -// from the given mode info structure (if available) -static int get_candidate_mvref( + if (ref_frame == candidate_mi->mbmi.ref_frame) { + c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; + } else if (ref_frame == candidate_mi->mbmi.second_ref_frame) { + c_mv->as_int = candidate_mi->mbmi.mv[1].as_int; + } else { + ret_val = FALSE; + } + + return ret_val; +} + +// Gets candidate refenence motion vector(s) from the given mode info +// structure if they exists and do NOT match the given reference frame. +static void get_non_matching_candidates( const MODE_INFO *candidate_mi, MV_REFERENCE_FRAME ref_frame, MV_REFERENCE_FRAME *c_ref_frame, @@ -52,61 +72,29 @@ static int get_candidate_mvref( int_mv *c2_mv ) { - int ret_val = FALSE; + c_mv->as_int = 0; c2_mv->as_int = 0; + *c_ref_frame = INTRA_FRAME; *c2_ref_frame = INTRA_FRAME; - // Target ref frame matches candidate first ref frame - if (ref_frame == candidate_mi->mbmi.ref_frame) { - c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; - *c_ref_frame = ref_frame; - ret_val = TRUE; + // If first candidate not valid neither will be. + if (candidate_mi->mbmi.ref_frame > INTRA_FRAME) { + // First candidate + if (candidate_mi->mbmi.ref_frame != ref_frame) { + *c_ref_frame = candidate_mi->mbmi.ref_frame; + c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; + } - // Is there a second non zero vector we can use. + // Second candidate if ((candidate_mi->mbmi.second_ref_frame > INTRA_FRAME) && - (candidate_mi->mbmi.mv[1].as_int != 0) && - (candidate_mi->mbmi.mv[1].as_int != c_mv->as_int)) { - c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int; + (candidate_mi->mbmi.second_ref_frame != ref_frame)) { // && + // (candidate_mi->mbmi.mv[1].as_int != 0) && + // (candidate_mi->mbmi.mv[1].as_int != + // candidate_mi->mbmi.mv[0].as_int)) { *c2_ref_frame = candidate_mi->mbmi.second_ref_frame; - } - - // Target ref frame matches candidate second ref frame - } else if (ref_frame == candidate_mi->mbmi.second_ref_frame) { - c_mv->as_int = candidate_mi->mbmi.mv[1].as_int; - *c_ref_frame = ref_frame; - ret_val = TRUE; - - // Is there a second non zero vector we can use. - if ((candidate_mi->mbmi.ref_frame > INTRA_FRAME) && - (candidate_mi->mbmi.mv[0].as_int != 0) && - (candidate_mi->mbmi.mv[0].as_int != c_mv->as_int)) { - c2_mv->as_int = candidate_mi->mbmi.mv[0].as_int; - *c2_ref_frame = candidate_mi->mbmi.ref_frame; - } - - // No ref frame matches so use first ref mv as first choice - } else if (candidate_mi->mbmi.ref_frame > INTRA_FRAME) { - c_mv->as_int = candidate_mi->mbmi.mv[0].as_int; - *c_ref_frame = candidate_mi->mbmi.ref_frame; - ret_val = TRUE; - - // Is there a second non zero vector we can use. - if ((candidate_mi->mbmi.second_ref_frame > INTRA_FRAME) && - (candidate_mi->mbmi.mv[1].as_int != 0) && - (candidate_mi->mbmi.mv[1].as_int != c_mv->as_int)) { c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int; - *c2_ref_frame = candidate_mi->mbmi.second_ref_frame; } - - // If only the second ref mv is valid:- (Should not trigger in current code - // base given current possible compound prediction options). - } else if (candidate_mi->mbmi.second_ref_frame > INTRA_FRAME) { - c_mv->as_int = candidate_mi->mbmi.mv[1].as_int; - *c_ref_frame = candidate_mi->mbmi.second_ref_frame; - ret_val = TRUE; } - - return ret_val; } // Performs mv adjustment based on reference frame and clamps the MV @@ -240,8 +228,6 @@ void vp9_find_mv_refs( int candidate_scores[MAX_MV_REF_CANDIDATES]; int index = 0; int split_count = 0; - int ref_weight = 0; - int valid_mv_ref; int (*mv_ref_search)[2]; int *ref_distance_weight; @@ -262,8 +248,9 @@ void vp9_find_mv_refs( mv_ref_search = mb_mv_ref_search; ref_distance_weight = mb_ref_distance_weight; #endif - // Populate a list with candidate reference vectors from the - // spatial neighbours. + + // We first scan for candidate vectors that match the current reference frame + // Look at nearest neigbours for (i = 0; i < 2; ++i) { if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) && ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) { @@ -271,89 +258,88 @@ void vp9_find_mv_refs( candidate_mi = here + mv_ref_search[i][0] + (mv_ref_search[i][1] * xd->mode_info_stride); - valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame, - &c_ref_frame, &c_refmv, - &c2_ref_frame, &c2_refmv); - - // If there is a valid MV candidate then add it to the list - if (valid_mv_ref) { - scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias ); - ref_weight = ref_distance_weight[i] + - ((c_ref_frame == ref_frame) << 4); - split_count += (candidate_mi->mbmi.mode == SPLITMV); - + if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv)) { + clamp_mv(xd, &c_refmv); addmv_and_shuffle(candidate_mvs, candidate_scores, - &index, c_refmv, ref_weight); - - // If there is a second valid mv then add it as well. - if (c2_ref_frame > INTRA_FRAME) { - scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias ); - ref_weight = ref_distance_weight[i] + - ((c2_ref_frame == ref_frame) << 4); - - addmv_and_shuffle(candidate_mvs, candidate_scores, - &index, c2_refmv, ref_weight); - } + &index, c_refmv, ref_distance_weight[i] + 16); } + split_count += (candidate_mi->mbmi.mode == SPLITMV); } } - - // Look at the corresponding vector in the last frame + // Look in the last frame candidate_mi = lf_here; - valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame, - &c_ref_frame, &c_refmv, - &c2_ref_frame, &c2_refmv); - - // If there is a valid MV candidate then add it to the list - if (valid_mv_ref) { - scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias ); - ref_weight = 2 + ((c_ref_frame == ref_frame) << 4); + if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv)) { + clamp_mv(xd, &c_refmv); addmv_and_shuffle(candidate_mvs, candidate_scores, - &index, c_refmv, ref_weight); - - // If there is a second valid mv then add it as well. - if (c2_ref_frame > INTRA_FRAME) { - scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias ); - ref_weight = ref_distance_weight[i] + - ((c2_ref_frame == ref_frame) << 4); - - addmv_and_shuffle(candidate_mvs, candidate_scores, - &index, c2_refmv, ref_weight); - } + &index, c_refmv, 18); } - - // Populate a list with candidate reference vectors from the - // spatial neighbours. - for (i = 2; (i < MVREF_NEIGHBOURS) && (index < (MAX_MV_REFS - 2)); ++i) { + // More distant neigbours + for (i = 2; (i < MVREF_NEIGHBOURS) && + (index < (MAX_MV_REF_CANDIDATES - 1)); ++i) { if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) && ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) { - candidate_mi = here + mv_ref_search[i][0] + (mv_ref_search[i][1] * xd->mode_info_stride); - valid_mv_ref = get_candidate_mvref(candidate_mi, ref_frame, - &c_ref_frame, &c_refmv, - &c2_ref_frame, &c2_refmv); - - // If there is a valid MV candidate then add it to the list - if (valid_mv_ref) { - scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias ); - ref_weight = ref_distance_weight[i] + - ((c_ref_frame == ref_frame) << 4); - + if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv)) { + clamp_mv(xd, &c_refmv); addmv_and_shuffle(candidate_mvs, candidate_scores, - &index, c_refmv, ref_weight); + &index, c_refmv, ref_distance_weight[i] + 16); + } + } + } - // If there is a second valid mv then add it as well. - if (c2_ref_frame > INTRA_FRAME) { - scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias ); - ref_weight = ref_distance_weight[i] + - ((c2_ref_frame == ref_frame) << 4); + // If we have not found enough candidates consider ones where the + // reference frame does not match. Break out when we have + // MAX_MV_REF_CANDIDATES candidates. + // Look first at spatial neighbours + if (index < (MAX_MV_REF_CANDIDATES - 1)) { + for (i = 0; i < MVREF_NEIGHBOURS; ++i) { + if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) && + ((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) { + candidate_mi = here + mv_ref_search[i][0] + + (mv_ref_search[i][1] * xd->mode_info_stride); + + get_non_matching_candidates(candidate_mi, ref_frame, + &c_ref_frame, &c_refmv, + &c2_ref_frame, &c2_refmv); + + if (c_ref_frame != INTRA_FRAME) { + scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias); addmv_and_shuffle(candidate_mvs, candidate_scores, - &index, c2_refmv, ref_weight); + &index, c_refmv, ref_distance_weight[i]); + } + + if (c2_ref_frame != INTRA_FRAME) { + scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias); + addmv_and_shuffle(candidate_mvs, candidate_scores, + &index, c2_refmv, ref_distance_weight[i]); } } + + if (index >= (MAX_MV_REF_CANDIDATES - 1)) { + break; + } + } + } + // Look at the last frame + if (index < (MAX_MV_REF_CANDIDATES - 1)) { + candidate_mi = lf_here; + get_non_matching_candidates(candidate_mi, ref_frame, + &c_ref_frame, &c_refmv, + &c2_ref_frame, &c2_refmv); + + if (c_ref_frame != INTRA_FRAME) { + scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias); + addmv_and_shuffle(candidate_mvs, candidate_scores, + &index, c_refmv, 2); + } + + if (c2_ref_frame != INTRA_FRAME) { + scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias); + addmv_and_shuffle(candidate_mvs, candidate_scores, + &index, c2_refmv, 2); } } From 81d1171fd4614c3b60439b97007a3ec7ea5e3d0c Mon Sep 17 00:00:00 2001 From: Adrian Grange Date: Fri, 4 Jan 2013 09:00:47 -0800 Subject: [PATCH 44/77] Fix mode selection infinite loop bug Mode selection for SBs could enter an infinite loop because the interpolation filter mode index was not being reset correctly. Change-Id: I4bbe726f29ef5b6836e94884067c46084713cc11 --- vp9/encoder/vp9_rdopt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 282bf0c6a..774b577a0 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -4605,6 +4605,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Test best rd so far against threshold for trying this mode. if (best_rd <= cpi->rd_threshes[mode_index] || cpi->rd_threshes[mode_index] == INT_MAX) { + switchable_filter_index = 0; continue; } From c3941665e995f12f9aa9b47a32c06d20978993fc Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sat, 5 Jan 2013 18:20:25 -0800 Subject: [PATCH 45/77] 64x64 blocksize support. 3.2% gains on std/hd, 1.0% gains on hd. Change-Id: I481d5df23d8a4fc650a5bcba956554490b2bd200 --- configure | 1 + vp9/common/vp9_blockd.h | 21 +- vp9/common/vp9_common.h | 3 + vp9/common/vp9_findnearmv.c | 62 +- vp9/common/vp9_loopfilter.c | 8 +- vp9/common/vp9_mvref_common.c | 2 +- vp9/common/vp9_onyxc_int.h | 7 +- vp9/common/vp9_pred_common.c | 78 +- vp9/common/vp9_reconinter.c | 64 ++ vp9/common/vp9_reconinter.h | 7 + vp9/common/vp9_reconintra.c | 85 +- vp9/common/vp9_reconintra.h | 6 + vp9/common/vp9_reconintra4x4.c | 16 +- vp9/common/vp9_rtcd_defs.sh | 30 + vp9/decoder/vp9_decodemv.c | 98 +- vp9/decoder/vp9_decodframe.c | 579 +++++++---- vp9/encoder/vp9_bitstream.c | 895 ++++++++-------- vp9/encoder/vp9_block.h | 7 +- vp9/encoder/vp9_encodeframe.c | 1774 +++++++++++++++++++------------- vp9/encoder/vp9_mcomp.c | 4 +- vp9/encoder/vp9_onyx_if.c | 58 +- vp9/encoder/vp9_onyx_int.h | 12 +- vp9/encoder/vp9_rdopt.c | 688 ++++++++++--- vp9/encoder/vp9_rdopt.h | 21 +- vp9/encoder/vp9_sad_c.c | 67 ++ vp9/encoder/vp9_segmentation.c | 165 +-- vp9/encoder/vp9_variance_c.c | 74 ++ 27 files changed, 3059 insertions(+), 1773 deletions(-) diff --git a/configure b/configure index 1126ea86e..5ed688e2f 100755 --- a/configure +++ b/configure @@ -240,6 +240,7 @@ EXPERIMENT_LIST=" csm comp_intra_pred superblocks + superblocks64 pred_filter lossless subpelrefmv diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index c430ea2ae..9ca2b22e6 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -226,6 +226,16 @@ typedef enum { MAX_REF_FRAMES = 4 } MV_REFERENCE_FRAME; +#if CONFIG_SUPERBLOCKS +typedef enum { + BLOCK_SIZE_MB16X16 = 0, + BLOCK_SIZE_SB32X32 = 1, +#if CONFIG_SUPERBLOCKS64 + BLOCK_SIZE_SB64X64 = 2, +#endif +} BLOCK_SIZE_TYPE; +#endif + typedef struct { MB_PREDICTION_MODE mode, uv_mode; #if CONFIG_COMP_INTRA_PRED @@ -268,8 +278,8 @@ typedef struct { #if CONFIG_SUPERBLOCKS // FIXME need a SB array of 4 MB_MODE_INFOs that - // only needs one encoded_as_sb. - unsigned char encoded_as_sb; + // only needs one sb_type. + BLOCK_SIZE_TYPE sb_type; #endif } MB_MODE_INFO; @@ -415,6 +425,7 @@ typedef struct macroblockd { DECLARE_ALIGNED(32, uint8_t, y_buf[22 * 32]); #endif + int sb_index; int mb_index; // Index of the MB in the SB (0..3) int q_index; @@ -519,7 +530,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) { return tx_type; #if CONFIG_SUPERBLOCKS // TODO(rbultje, debargha): Explore ADST usage for superblocks - if (xd->mode_info_context->mbmi.encoded_as_sb) + if (xd->mode_info_context->mbmi.sb_type) return tx_type; #endif if (xd->mode_info_context->mbmi.mode == B_PRED && @@ -576,7 +587,7 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) { return tx_type; #if CONFIG_SUPERBLOCKS // TODO(rbultje, debargha): Explore ADST usage for superblocks - if (xd->mode_info_context->mbmi.encoded_as_sb) + if (xd->mode_info_context->mbmi.sb_type) return tx_type; #endif if (xd->mode_info_context->mbmi.mode == I8X8_PRED && @@ -611,7 +622,7 @@ static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) { return tx_type; #if CONFIG_SUPERBLOCKS // TODO(rbultje, debargha): Explore ADST usage for superblocks - if (xd->mode_info_context->mbmi.encoded_as_sb) + if (xd->mode_info_context->mbmi.sb_type) return tx_type; #endif if (xd->mode_info_context->mbmi.mode < I8X8_PRED && diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h index ee027585c..2e1ee4b1a 100644 --- a/vp9/common/vp9_common.h +++ b/vp9/common/vp9_common.h @@ -21,6 +21,9 @@ #define TRUE 1 #define FALSE 0 +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) + /* Only need this for fixed-size arrays, for structs just assign. */ #define vp9_copy(Dest, Src) { \ diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index b5d6bda4d..85982fc18 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -191,7 +191,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, above_src, xd->dst.y_stride, &sse); score += sse; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { vp9_sub_pixel_variance16x2_c(above_ref + offset + 16, ref_y_stride, SP(this_mv.as_mv.col), @@ -199,6 +199,22 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, above_src + 16, xd->dst.y_stride, &sse); score += sse; } +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { + vp9_sub_pixel_variance16x2_c(above_ref + offset + 32, + ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + above_src + 32, xd->dst.y_stride, &sse); + score += sse; + vp9_sub_pixel_variance16x2_c(above_ref + offset + 48, + ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + above_src + 48, xd->dst.y_stride, &sse); + score += sse; + } +#endif #endif } if (xd->left_available) { @@ -208,7 +224,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, left_src, xd->dst.y_stride, &sse); score += sse; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 16, ref_y_stride, SP(this_mv.as_mv.col), @@ -217,6 +233,24 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, xd->dst.y_stride, &sse); score += sse; } +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { + vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 32, + ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + left_src + xd->dst.y_stride * 32, + xd->dst.y_stride, &sse); + score += sse; + vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 48, + ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + left_src + xd->dst.y_stride * 48, + xd->dst.y_stride, &sse); + score += sse; + } +#endif #endif } #else @@ -230,22 +264,42 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, score += vp9_sad16x3(above_src, xd->dst.y_stride, above_ref + offset, ref_y_stride); #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { score += vp9_sad16x3(above_src + 16, xd->dst.y_stride, above_ref + offset + 16, ref_y_stride); } +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { + score += vp9_sad16x3(above_src + 32, xd->dst.y_stride, + above_ref + offset + 32, ref_y_stride); + score += vp9_sad16x3(above_src + 48, xd->dst.y_stride, + above_ref + offset + 48, ref_y_stride); + } +#endif #endif } if (xd->left_available) { score += vp9_sad3x16(left_src, xd->dst.y_stride, left_ref + offset, ref_y_stride); #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { score += vp9_sad3x16(left_src + xd->dst.y_stride * 16, xd->dst.y_stride, left_ref + offset + ref_y_stride * 16, ref_y_stride); } +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { + score += vp9_sad3x16(left_src + xd->dst.y_stride * 32, + xd->dst.y_stride, + left_ref + offset + ref_y_stride * 32, + ref_y_stride); + score += vp9_sad3x16(left_src + xd->dst.y_stride * 48, + xd->dst.y_stride, + left_ref + offset + ref_y_stride * 48, + ref_y_stride); + } +#endif #endif } #endif diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index a928a9268..5188aa47c 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -228,7 +228,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { if (mb_col > 0 #if CONFIG_SUPERBLOCKS - && !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb && + && !((mb_col & 1) && mode_info_context->mbmi.sb_type && ((mode_info_context[0].mbmi.mb_skip_coeff && mode_info_context[-1].mbmi.mb_skip_coeff) #if CONFIG_TX32X32 @@ -253,7 +253,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { /* don't apply across umv border */ if (mb_row > 0 #if CONFIG_SUPERBLOCKS - && !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb && + && !((mb_row & 1) && mode_info_context->mbmi.sb_type && ((mode_info_context[0].mbmi.mb_skip_coeff && mode_info_context[-mis].mbmi.mb_skip_coeff) #if CONFIG_TX32X32 @@ -277,7 +277,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { // FIXME: Not 8x8 aware if (mb_col > 0 #if CONFIG_SUPERBLOCKS - && !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb && + && !((mb_col & 1) && mode_info_context->mbmi.sb_type && mode_info_context[0].mbmi.mb_skip_coeff && mode_info_context[-1].mbmi.mb_skip_coeff) #endif @@ -292,7 +292,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { /* don't apply across umv border */ if (mb_row > 0 #if CONFIG_SUPERBLOCKS - && !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb && + && !((mb_row & 1) && mode_info_context->mbmi.sb_type && mode_info_context[0].mbmi.mb_skip_coeff && mode_info_context[-cm->mode_info_stride].mbmi.mb_skip_coeff) #endif diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index 4b576e895..bfdc1af32 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -237,7 +237,7 @@ void vp9_find_mv_refs( vpx_memset(candidate_scores, 0, sizeof(candidate_scores)); #if CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb) { + if (mbmi->sb_type) { mv_ref_search = sb_mv_ref_search; ref_distance_weight = sb_ref_distance_weight; } else { diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index d96e76c86..3b62dac81 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -229,7 +229,7 @@ typedef struct VP9Common { /* Y,U,V,Y2 */ ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ - ENTROPY_CONTEXT_PLANES left_context[2]; /* (up to) 4 contexts "" */ + ENTROPY_CONTEXT_PLANES left_context[4]; /* (up to) 4 contexts "" */ /* keyframe block modes are predicted by their above, left neighbors */ @@ -248,7 +248,10 @@ typedef struct VP9Common { vp9_prob prob_last_coded; vp9_prob prob_gf_coded; #if CONFIG_SUPERBLOCKS - vp9_prob sb_coded; + vp9_prob sb32_coded; +#if CONFIG_SUPERBLOCKS64 + vp9_prob sb64_coded; +#endif // CONFIG_SUPERBLOCKS64 #endif // Context probabilities when using predictive coding of segment id diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c index e8a3c4f5e..f2f35a3b4 100644 --- a/vp9/common/vp9_pred_common.c +++ b/vp9/common/vp9_pred_common.c @@ -9,6 +9,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_treecoder.h" @@ -230,13 +231,18 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, case PRED_SEG_ID: xd->mode_info_context->mbmi.seg_id_predicted = pred_flag; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[1].mbmi.seg_id_predicted = pred_flag; - if (xd->mb_to_bottom_edge >= 0) { - xd->mode_info_context[mis].mbmi.seg_id_predicted = pred_flag; - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[mis + 1].mbmi.seg_id_predicted = pred_flag; + if (xd->mode_info_context->mbmi.sb_type) { +#define sub(a, b) (b) < 0 ? (a) + (b) : (a) + const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; + const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7); + const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7); + int x, y; + + for (y = 0; y < y_mbs; y++) { + for (x = !y; x < x_mbs; x++) { + xd->mode_info_context[y * mis + x].mbmi.seg_id_predicted = + pred_flag; + } } } #endif @@ -245,13 +251,16 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, case PRED_REF: xd->mode_info_context->mbmi.ref_predicted = pred_flag; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[1].mbmi.ref_predicted = pred_flag; - if (xd->mb_to_bottom_edge >= 0) { - xd->mode_info_context[mis].mbmi.ref_predicted = pred_flag; - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[mis + 1].mbmi.ref_predicted = pred_flag; + if (xd->mode_info_context->mbmi.sb_type) { + const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; + const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7); + const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7); + int x, y; + + for (y = 0; y < y_mbs; y++) { + for (x = !y; x < x_mbs; x++) { + xd->mode_info_context[y * mis + x].mbmi.ref_predicted = pred_flag; + } } } #endif @@ -260,13 +269,16 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, case PRED_MBSKIP: xd->mode_info_context->mbmi.mb_skip_coeff = pred_flag; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[1].mbmi.mb_skip_coeff = pred_flag; - if (xd->mb_to_bottom_edge >= 0) { - xd->mode_info_context[mis].mbmi.mb_skip_coeff = pred_flag; - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = pred_flag; + if (xd->mode_info_context->mbmi.sb_type) { + const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; + const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7); + const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7); + int x, y; + + for (y = 0; y < y_mbs; y++) { + for (x = !y; x < x_mbs; x++) { + xd->mode_info_context[y * mis + x].mbmi.mb_skip_coeff = pred_flag; + } } } #endif @@ -288,21 +300,25 @@ unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm, // Currently the prediction for the macroblock segment ID is // the value stored for this macroblock in the previous frame. #if CONFIG_SUPERBLOCKS - if (!xd->mode_info_context->mbmi.encoded_as_sb) { + if (!xd->mode_info_context->mbmi.sb_type) { #endif return cm->last_frame_seg_map[MbIndex]; #if CONFIG_SUPERBLOCKS } else { - int seg_id = cm->last_frame_seg_map[MbIndex]; - int mb_col = MbIndex % cm->mb_cols; - int mb_row = MbIndex / cm->mb_cols; - if (mb_col + 1 < cm->mb_cols) - seg_id = seg_id && cm->last_frame_seg_map[MbIndex + 1]; - if (mb_row + 1 < cm->mb_rows) { - seg_id = seg_id && cm->last_frame_seg_map[MbIndex + cm->mb_cols]; - if (mb_col + 1 < cm->mb_cols) - seg_id = seg_id && cm->last_frame_seg_map[MbIndex + cm->mb_cols + 1]; + const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; + const int mb_col = MbIndex % cm->mb_cols; + const int mb_row = MbIndex / cm->mb_cols; + const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col); + const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row); + int x, y; + unsigned seg_id = -1; + + for (y = mb_row; y < mb_row + y_mbs; y++) { + for (x = mb_col; x < mb_col + x_mbs; x++) { + seg_id = MIN(seg_id, cm->last_frame_seg_map[cm->mb_cols * y + x]); + } } + return seg_id; } #endif diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 01d332f79..c1d4a29c7 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -780,6 +780,70 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, } #endif } + +void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_ystride, + int dst_uvstride) { + uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; + uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer, + *v2 = x->second_pre.v_buffer; + int edge[4], n; + + edge[0] = x->mb_to_top_edge; + edge[1] = x->mb_to_bottom_edge; + edge[2] = x->mb_to_left_edge; + edge[3] = x->mb_to_right_edge; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3); + x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3); + x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3); + x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3); + + x->pre.y_buffer = y1 + y_idx * 32 * x->pre.y_stride + x_idx * 32; + x->pre.u_buffer = u1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + x->pre.v_buffer = v1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2 + y_idx * 32 * x->pre.y_stride + x_idx * 32; + x->second_pre.u_buffer = u2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + x->second_pre.v_buffer = v2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + } + + vp9_build_inter32x32_predictors_sb(x, + dst_y + y_idx * 32 * dst_ystride + x_idx * 32, + dst_u + y_idx * 16 * dst_uvstride + x_idx * 16, + dst_v + y_idx * 16 * dst_uvstride + x_idx * 16, + dst_ystride, dst_uvstride); + } + + x->mb_to_top_edge = edge[0]; + x->mb_to_bottom_edge = edge[1]; + x->mb_to_left_edge = edge[2]; + x->mb_to_right_edge = edge[3]; + + x->pre.y_buffer = y1; + x->pre.u_buffer = u1; + x->pre.v_buffer = v1; + + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2; + x->second_pre.u_buffer = u2; + x->second_pre.v_buffer = v2; + } + +#if CONFIG_COMP_INTERINTRA_PRED + if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + vp9_build_interintra_64x64_predictors_sb(x, dst_y, dst_u, dst_v, + dst_ystride, dst_uvstride); + } +#endif +} #endif /* diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index b104f835d..5e45b6879 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -54,6 +54,13 @@ extern void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, uint8_t *dst_v, int dst_ystride, int dst_uvstride); + +extern void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_ystride, + int dst_uvstride); #endif extern void vp9_build_inter_predictors_mb(MACROBLOCKD *xd); diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index b893df151..3fec98a01 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -254,7 +254,7 @@ void vp9_build_intra_predictors_internal(uint8_t *src, int src_stride, int up_available, int left_available) { uint8_t *yabove_row = src - src_stride; - uint8_t yleft_col[32]; + uint8_t yleft_col[64]; uint8_t ytop_left = yabove_row[-1]; int r, c, i; @@ -271,15 +271,19 @@ void vp9_build_intra_predictors_internal(uint8_t *src, int src_stride, int average = 0; int log2_bsize_minus_1; - assert(bsize == 4 || bsize == 8 || bsize == 16 || bsize == 32); + assert(bsize == 4 || bsize == 8 || bsize == 16 || bsize == 32 || + bsize == 64); if (bsize == 4) { log2_bsize_minus_1 = 1; } else if (bsize == 8) { log2_bsize_minus_1 = 2; } else if (bsize == 16) { log2_bsize_minus_1 = 3; - } else /* bsize == 32 */ { + } else if (bsize == 32) { log2_bsize_minus_1 = 4; + } else { + assert(bsize == 64); + log2_bsize_minus_1 = 5; } if (up_available || left_available) { @@ -517,16 +521,17 @@ static void combine_interintra(MB_PREDICTION_MODE mode, 71, 70, 70, 70, 69, 69, 69, 68, 68, 68, 68, 68, 67, 67, 67, 67, }; - int size_scale = (size == 32 ? 1 : + int size_scale = (size >= 32 ? 1 : size == 16 ? 2 : size == 8 ? 4 : 8); + int size_shift = size == 64 ? 1 : 0; int i, j; switch (mode) { case V_PRED: for (i = 0; i < size; ++i) { for (j = 0; j < size; ++j) { int k = i * interstride + j; - int scale = weights1d[i * size_scale]; + int scale = weights1d[i * size_scale >> size_shift]; interpred[k] = ((scale_max - scale) * interpred[k] + scale * intrapred[i * intrastride + j] + scale_round) @@ -539,7 +544,7 @@ static void combine_interintra(MB_PREDICTION_MODE mode, for (i = 0; i < size; ++i) { for (j = 0; j < size; ++j) { int k = i * interstride + j; - int scale = weights1d[j * size_scale]; + int scale = weights1d[j * size_scale >> size_shift]; interpred[k] = ((scale_max - scale) * interpred[k] + scale * intrapred[i * intrastride + j] + scale_round) @@ -553,8 +558,9 @@ static void combine_interintra(MB_PREDICTION_MODE mode, for (i = 0; i < size; ++i) { for (j = 0; j < size; ++j) { int k = i * interstride + j; - int scale = (weights2d[i * size_scale * 32 + j * size_scale] + - weights1d[i * size_scale]) >> 1; + int scale = (weights2d[(i * size_scale * 32 + + j * size_scale) >> size_shift] + + weights1d[i * size_scale >> size_shift]) >> 1; interpred[k] = ((scale_max - scale) * interpred[k] + scale * intrapred[i * intrastride + j] + scale_round) @@ -568,8 +574,9 @@ static void combine_interintra(MB_PREDICTION_MODE mode, for (i = 0; i < size; ++i) { for (j = 0; j < size; ++j) { int k = i * interstride + j; - int scale = (weights2d[i * size_scale * 32 + j * size_scale] + - weights1d[j * size_scale]) >> 1; + int scale = (weights2d[(i * size_scale * 32 + + j * size_scale) >> size_shift] + + weights1d[j * size_scale >> size_shift]) >> 1; interpred[k] = ((scale_max - scale) * interpred[k] + scale * intrapred[i * intrastride + j] + scale_round) @@ -582,7 +589,8 @@ static void combine_interintra(MB_PREDICTION_MODE mode, for (i = 0; i < size; ++i) { for (j = 0; j < size; ++j) { int k = i * interstride + j; - int scale = weights2d[i * size_scale * 32 + j * size_scale]; + int scale = weights2d[(i * size_scale * 32 + + j * size_scale) >> size_shift]; interpred[k] = ((scale_max - scale) * interpred[k] + scale * intrapred[i * intrastride + j] + scale_round) @@ -695,6 +703,47 @@ void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, vp9_build_interintra_32x32_predictors_sby(xd, ypred, ystride); vp9_build_interintra_32x32_predictors_sbuv(xd, upred, vpred, uvstride); } + +void vp9_build_interintra_64x64_predictors_sby(MACROBLOCKD *xd, + uint8_t *ypred, + int ystride) { + uint8_t intrapredictor[4096]; + const int mode = xd->mode_info_context->mbmi.interintra_mode; + vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, + intrapredictor, 64, mode, 64, + xd->up_available, xd->left_available); + combine_interintra(xd->mode_info_context->mbmi.interintra_mode, + ypred, ystride, intrapredictor, 64, 64); +} + +void vp9_build_interintra_64x64_predictors_sbuv(MACROBLOCKD *xd, + uint8_t *upred, + uint8_t *vpred, + int uvstride) { + uint8_t uintrapredictor[1024]; + uint8_t vintrapredictor[1024]; + const int mode = xd->mode_info_context->mbmi.interintra_uv_mode; + vp9_build_intra_predictors_internal(xd->dst.u_buffer, xd->dst.uv_stride, + uintrapredictor, 32, mode, 32, + xd->up_available, xd->left_available); + vp9_build_intra_predictors_internal(xd->dst.v_buffer, xd->dst.uv_stride, + vintrapredictor, 32, mode, 32, + xd->up_available, xd->left_available); + combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode, + upred, uvstride, uintrapredictor, 32, 32); + combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode, + vpred, uvstride, vintrapredictor, 32, 32); +} + +void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd, + uint8_t *ypred, + uint8_t *upred, + uint8_t *vpred, + int ystride, + int uvstride) { + vp9_build_interintra_64x64_predictors_sby(xd, ypred, ystride); + vp9_build_interintra_64x64_predictors_sbuv(xd, upred, vpred, uvstride); +} #endif #endif @@ -719,6 +768,13 @@ void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd) { xd->mode_info_context->mbmi.mode, 32, xd->up_available, xd->left_available); } + +void vp9_build_intra_predictors_sb64y_s(MACROBLOCKD *xd) { + vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, + xd->dst.y_buffer, xd->dst.y_stride, + xd->mode_info_context->mbmi.mode, 64, + xd->up_available, xd->left_available); +} #endif #if CONFIG_COMP_INTRA_PRED @@ -778,6 +834,13 @@ void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd) { xd->mode_info_context->mbmi.uv_mode, 16); } + +void vp9_build_intra_predictors_sb64uv_s(MACROBLOCKD *xd) { + vp9_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer, + xd->dst.v_buffer, xd->dst.uv_stride, + xd->mode_info_context->mbmi.uv_mode, + 32); +} #endif #if CONFIG_COMP_INTRA_PRED diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h index f3016dd79..7bdcb4ece 100644 --- a/vp9/common/vp9_reconintra.h +++ b/vp9/common/vp9_reconintra.h @@ -41,6 +41,12 @@ extern void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, uint8_t *vpred, int ystride, int uvstride); +extern void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd, + uint8_t *ypred, + uint8_t *upred, + uint8_t *vpred, + int ystride, + int uvstride); #endif // CONFIG_SUPERBLOCKS #endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c index d61a515b8..c41b55bca 100644 --- a/vp9/common/vp9_reconintra4x4.c +++ b/vp9/common/vp9_reconintra4x4.c @@ -434,12 +434,9 @@ void vp9_comp_intra4x4_predict_c(BLOCKD *x, * to the right prediction have filled in pixels to use. */ void vp9_intra_prediction_down_copy(MACROBLOCKD *xd) { - int extend_edge = (xd->mb_to_right_edge == 0 && xd->mb_index < 2); + int extend_edge = xd->mb_to_right_edge == 0 && xd->mb_index < 2; uint8_t *above_right = *(xd->block[0].base_dst) + xd->block[0].dst - xd->block[0].dst_stride + 16; - uint32_t *src_ptr = (uint32_t *) - (above_right - (xd->mb_index == 3 ? 16 * xd->block[0].dst_stride : 0)); - uint32_t *dst_ptr0 = (uint32_t *)above_right; uint32_t *dst_ptr1 = (uint32_t *)(above_right + 4 * xd->block[0].dst_stride); @@ -448,6 +445,17 @@ void vp9_intra_prediction_down_copy(MACROBLOCKD *xd) { uint32_t *dst_ptr3 = (uint32_t *)(above_right + 12 * xd->block[0].dst_stride); + uint32_t *src_ptr = (uint32_t *) above_right; + + if ((xd->sb_index >= 2 && xd->mb_to_right_edge == 0) || + (xd->sb_index == 3 && xd->mb_index & 1)) + src_ptr = (uint32_t *) (((uint8_t *) src_ptr) - 32 * + xd->block[0].dst_stride); + if (xd->mb_index == 3 || + (xd->mb_to_right_edge == 0 && xd->mb_index == 2)) + src_ptr = (uint32_t *) (((uint8_t *) src_ptr) - 16 * + xd->block[0].dst_stride); + if (extend_edge) { *src_ptr = ((uint8_t *) src_ptr)[-1] * 0x01010101U; } diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 9cf7121ba..f02ee0260 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -418,6 +418,9 @@ if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance32x32 +prototype unsigned int vp9_variance64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance64x64 + prototype unsigned int vp9_variance16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance16x16 mmx sse2 vp9_variance16x16_sse2=vp9_variance16x16_wmt @@ -443,6 +446,9 @@ specialize vp9_variance4x4 mmx sse2 vp9_variance4x4_sse2=vp9_variance4x4_wmt vp9_variance4x4_mmx=vp9_variance4x4_mmx +prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse" +specialize vp9_sub_pixel_variance64x64 + prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse" specialize vp9_sub_pixel_variance32x32 @@ -467,6 +473,9 @@ prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int so specialize vp9_sub_pixel_variance4x4 sse2 mmx vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt +prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad64x64 + prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad32x32 @@ -502,6 +511,15 @@ prototype unsigned int vp9_variance_halfpixvar16x16_hv "const uint8_t *src_ptr, specialize vp9_variance_halfpixvar16x16_hv mmx sse2 vp9_variance_halfpixvar16x16_hv_sse2=vp9_variance_halfpixvar16x16_hv_wmt +prototype unsigned int vp9_variance_halfpixvar64x64_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance_halfpixvar64x64_h + +prototype unsigned int vp9_variance_halfpixvar64x64_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance_halfpixvar64x64_v + +prototype unsigned int vp9_variance_halfpixvar64x64_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance_halfpixvar64x64_hv + prototype unsigned int vp9_variance_halfpixvar32x32_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance_halfpixvar32x32_h @@ -511,6 +529,9 @@ specialize vp9_variance_halfpixvar32x32_v prototype unsigned int vp9_variance_halfpixvar32x32_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance_halfpixvar32x32_hv +prototype void vp9_sad64x64x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp9_sad64x64x3 + prototype void vp9_sad32x32x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" specialize vp9_sad32x32x3 @@ -529,6 +550,9 @@ specialize vp9_sad8x8x3 sse3 prototype void vp9_sad4x4x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" specialize vp9_sad4x4x3 sse3 +prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" +specialize vp9_sad64x64x8 + prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" specialize vp9_sad32x32x8 @@ -547,6 +571,9 @@ specialize vp9_sad8x8x8 sse4 prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" specialize vp9_sad4x4x8 sse4 +prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp9_sad64x64x4d + prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" specialize vp9_sad32x32x4d @@ -583,6 +610,9 @@ prototype unsigned int vp9_mse16x16 "const uint8_t *src_ptr, int source_stride, specialize vp9_mse16x16 mmx sse2 vp9_mse16x16_sse2=vp9_mse16x16_wmt +prototype unsigned int vp9_sub_pixel_mse64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse" +specialize vp9_sub_pixel_mse64x64 + prototype unsigned int vp9_sub_pixel_mse32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse" specialize vp9_sub_pixel_mse32x32 diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index cbd3fb984..bbe2e953c 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -14,7 +14,7 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/common/vp9_findnearmv.h" - +#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_entropy.h" @@ -122,7 +122,24 @@ static void kfread_modes(VP9D_COMP *pbi, m->mbmi.segment_id = 0; if (pbi->mb.update_mb_segmentation_map) { read_mb_segid(bc, &m->mbmi, &pbi->mb); - pbi->common.last_frame_seg_map[map_index] = m->mbmi.segment_id; +#if CONFIG_SUPERBLOCKS + if (m->mbmi.sb_type) { + const int nmbs = 1 << m->mbmi.sb_type; + const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); + const int xmbs = MIN(cm->mb_cols - mb_col, nmbs); + int x, y; + + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + cm->last_frame_seg_map[map_index + x + y * cm->mb_cols] = + m->mbmi.segment_id; + } + } + } else +#endif + { + cm->last_frame_seg_map[map_index] = m->mbmi.segment_id; + } } m->mbmi.mb_skip_coeff = 0; @@ -145,7 +162,7 @@ static void kfread_modes(VP9D_COMP *pbi, } #if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { + if (m->mbmi.sb_type) { y_mode = (MB_PREDICTION_MODE) read_kf_sb_ymode(bc, pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]); } else @@ -212,12 +229,12 @@ static void kfread_modes(VP9D_COMP *pbi, if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED) { m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[1]); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - if (m->mbmi.txfm_size != TX_8X8 && m->mbmi.encoded_as_sb) + if (m->mbmi.txfm_size != TX_8X8 && m->mbmi.sb_type) m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[2]); #endif } #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - } else if (cm->txfm_mode >= ALLOW_32X32 && m->mbmi.encoded_as_sb) { + } else if (cm->txfm_mode >= ALLOW_32X32 && m->mbmi.sb_type) { m->mbmi.txfm_size = TX_32X32; #endif } else if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) { @@ -638,14 +655,17 @@ static void read_mb_segment_id(VP9D_COMP *pbi, read_mb_segid(bc, mbmi, xd); } #if CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb) { - cm->last_frame_seg_map[index] = mbmi->segment_id; - if (mb_col + 1 < cm->mb_cols) - cm->last_frame_seg_map[index + 1] = mbmi->segment_id; - if (mb_row + 1 < cm->mb_rows) { - cm->last_frame_seg_map[index + cm->mb_cols] = mbmi->segment_id; - if (mb_col + 1 < cm->mb_cols) - cm->last_frame_seg_map[index + cm->mb_cols + 1] = mbmi->segment_id; + if (mbmi->sb_type) { + const int nmbs = 1 << mbmi->sb_type; + const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); + const int xmbs = MIN(cm->mb_cols - mb_col, nmbs); + int x, y; + + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + cm->last_frame_seg_map[index + x + y * cm->mb_cols] = + mbmi->segment_id; + } } } else #endif @@ -654,18 +674,21 @@ static void read_mb_segment_id(VP9D_COMP *pbi, } } else { #if CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb) { - mbmi->segment_id = cm->last_frame_seg_map[index]; - if (mb_col < cm->mb_cols - 1) - mbmi->segment_id = mbmi->segment_id && - cm->last_frame_seg_map[index + 1]; - if (mb_row < cm->mb_rows - 1) { - mbmi->segment_id = mbmi->segment_id && - cm->last_frame_seg_map[index + cm->mb_cols]; - if (mb_col < cm->mb_cols - 1) - mbmi->segment_id = mbmi->segment_id && - cm->last_frame_seg_map[index + cm->mb_cols + 1]; + if (mbmi->sb_type) { + const int nmbs = 1 << mbmi->sb_type; + const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); + const int xmbs = MIN(cm->mb_cols - mb_col, nmbs); + unsigned segment_id = -1; + int x, y; + + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + segment_id = MIN(segment_id, + cm->last_frame_seg_map[index + x + + y * cm->mb_cols]); + } } + mbmi->segment_id = segment_id; } else #endif { @@ -693,6 +716,11 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int mb_to_right_edge; int mb_to_top_edge; int mb_to_bottom_edge; +#if CONFIG_SUPERBLOCKS + const int mb_size = 1 << mi->mbmi.sb_type; +#else + const int mb_size = 1; +#endif mb_to_top_edge = xd->mb_to_top_edge; mb_to_bottom_edge = xd->mb_to_bottom_edge; @@ -707,18 +735,8 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, xd->mb_to_left_edge = mb_to_left_edge = -((mb_col * 16) << 3); mb_to_left_edge -= LEFT_TOP_MARGIN; - -#if CONFIG_SUPERBLOCKS - if (mi->mbmi.encoded_as_sb) { - xd->mb_to_right_edge = - mb_to_right_edge = ((pbi->common.mb_cols - 2 - mb_col) * 16) << 3; - } else { -#endif - xd->mb_to_right_edge = - mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3; -#if CONFIG_SUPERBLOCKS - } -#endif + xd->mb_to_right_edge = + mb_to_right_edge = ((pbi->common.mb_cols - mb_size - mb_col) * 16) << 3; mb_to_right_edge += RIGHT_BOTTOM_MARGIN; // Make sure the MACROBLOCKD mode info pointer is pointed at the @@ -801,7 +819,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); } else { #if CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb) + if (mbmi->sb_type) mbmi->mode = read_sb_mv_ref(bc, mv_ref_p); else #endif @@ -1155,7 +1173,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->mode = (MB_PREDICTION_MODE) vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); #if CONFIG_SUPERBLOCKS - } else if (mbmi->encoded_as_sb) { + } else if (mbmi->sb_type) { mbmi->mode = (MB_PREDICTION_MODE) read_sb_ymode(bc, pbi->common.fc.sb_ymode_prob); pbi->common.fc.sb_ymode_counts[mbmi->mode]++; @@ -1232,12 +1250,12 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->mode != SPLITMV) { mbmi->txfm_size += vp9_read(bc, cm->prob_tx[1]); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb && mbmi->txfm_size != TX_8X8) + if (mbmi->sb_type && mbmi->txfm_size != TX_8X8) mbmi->txfm_size += vp9_read(bc, cm->prob_tx[2]); #endif } #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - } else if (mbmi->encoded_as_sb && cm->txfm_mode >= ALLOW_32X32) { + } else if (mbmi->sb_type && cm->txfm_mode >= ALLOW_32X32) { mbmi->txfm_size = TX_32X32; #endif } else if (cm->txfm_mode >= ALLOW_16X16 && diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index af345824e..d524ade66 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -10,6 +10,7 @@ #include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_header.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_reconintra4x4.h" @@ -172,55 +173,69 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + vp9_build_intra_predictors_sb64uv_s(xd); + vp9_build_intra_predictors_sb64y_s(xd); + } else +#endif // CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { vp9_build_intra_predictors_sbuv_s(xd); vp9_build_intra_predictors_sby_s(xd); - } else { -#endif - vp9_build_intra_predictors_mbuv_s(xd); - vp9_build_intra_predictors_mby_s(xd); -#if CONFIG_SUPERBLOCKS + } else +#endif // CONFIG_SUPERBLOCKS + { + vp9_build_intra_predictors_mbuv_s(xd); + vp9_build_intra_predictors_mby_s(xd); } -#endif } else { #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + vp9_build_inter64x64_predictors_sb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride); + } else +#endif // CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); - } else { -#endif - vp9_build_1st_inter16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - vp9_build_2nd_inter16x16_predictors_mb(xd, + } else +#endif // CONFIG_SUPERBLOCKS + { + vp9_build_1st_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); - } -#if CONFIG_COMP_INTERINTRA_PRED - else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { - vp9_build_interintra_16x16_predictors_mb(xd, + + if (xd->mode_info_context->mbmi.second_ref_frame > 0) { + vp9_build_2nd_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); - } + } +#if CONFIG_COMP_INTERINTRA_PRED + else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + vp9_build_interintra_16x16_predictors_mb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride); + } #endif -#if CONFIG_SUPERBLOCKS } -#endif } } @@ -546,8 +561,9 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, #if CONFIG_SUPERBLOCKS static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n) { - int x_idx = n & 1, y_idx = n >> 1; + BOOL_DECODER* const bc, int n, + int maska, int shiftb) { + int x_idx = n & maska, y_idx = n >> shiftb; TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]); if (tx_type != DCT_DCT) { vp9_ht_dequant_idct_add_16x16_c( @@ -571,9 +587,10 @@ static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, }; static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n) { + BOOL_DECODER* const bc, int n, + int maska, int shiftb) { + int x_idx = n & maska, y_idx = n >> shiftb; BLOCKD *b = &xd->block[24]; - int x_idx = n & 1, y_idx = n >> 1; TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]); if (tx_type != DCT_DCT) { int i; @@ -632,9 +649,10 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, }; static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n) { + BOOL_DECODER* const bc, int n, + int maska, int shiftb) { + int x_idx = n & maska, y_idx = n >> shiftb; BLOCKD *b = &xd->block[24]; - int x_idx = n & 1, y_idx = n >> 1; TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[0]); if (tx_type != DCT_DCT) { int i; @@ -687,16 +705,148 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->dst.uv_stride, xd->eobs + 16, xd); }; -static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, - int mb_row, unsigned int mb_col, - BOOL_DECODER* const bc) { +#if CONFIG_SUPERBLOCKS64 +static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, + int mb_row, unsigned int mb_col, + BOOL_DECODER* const bc) { int i, n, eobtotal; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; VP9_COMMON *const pc = &pbi->common; MODE_INFO *orig_mi = xd->mode_info_context; const int mis = pc->mode_info_stride; - assert(xd->mode_info_context->mbmi.encoded_as_sb); + assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64); + + if (pbi->common.frame_type != KEY_FRAME) + vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc); + + // re-initialize macroblock dequantizer before detokenization + if (xd->segmentation_enabled) + mb_init_dequantizer(pbi, xd); + + if (xd->mode_info_context->mbmi.mb_skip_coeff) { + int n; + + vp9_reset_mb_tokens_context(xd); + for (n = 1; n <= 3; n++) { + if (mb_col < pc->mb_cols - n) + xd->above_context += n; + if (mb_row < pc->mb_rows - n) + xd->left_context += n; + vp9_reset_mb_tokens_context(xd); + if (mb_col < pc->mb_cols - n) + xd->above_context -= n; + if (mb_row < pc->mb_rows - n) + xd->left_context -= n; + } + + /* Special case: Force the loopfilter to skip when eobtotal and + * mb_skip_coeff are zero. + */ + skip_recon_mb(pbi, xd); + return; + } + + /* do prediction */ + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { + vp9_build_intra_predictors_sb64y_s(xd); + vp9_build_intra_predictors_sb64uv_s(xd); + } else { + vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + } + + /* dequantization and idct */ +#if CONFIG_TX32X32 + if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + if (mb_col + x_idx * 2 >= pc->mb_cols || + mb_row + y_idx * 2 >= pc->mb_rows) + continue; + + xd->left_context = pc->left_context + (y_idx << 1); + xd->above_context = pc->above_context + mb_col + (x_idx << 1); + xd->mode_info_context = orig_mi + x_idx * 2 + y_idx * 2 * mis; + eobtotal = vp9_decode_sb_tokens(pbi, xd, bc); + if (eobtotal == 0) { // skip loopfilter + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + if (mb_col + 1 < pc->mb_cols) + xd->mode_info_context[1].mbmi.mb_skip_coeff = 1; + if (mb_row + 1 < pc->mb_rows) { + xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1; + if (mb_col + 1 < pc->mb_cols) + xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1; + } + } else { + vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant, + xd->dst.y_buffer + x_idx * 32 + + xd->dst.y_stride * y_idx * 32, + xd->dst.y_buffer + x_idx * 32 + + xd->dst.y_stride * y_idx * 32, + xd->dst.y_stride, xd->dst.y_stride, + xd->eobs[0]); + vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024, + xd->block[16].dequant, + xd->dst.u_buffer + x_idx * 16 + + xd->dst.uv_stride * y_idx * 16, + xd->dst.v_buffer + x_idx * 16 + + xd->dst.uv_stride * y_idx * 16, + xd->dst.uv_stride, xd->eobs + 16); + } + } + } else { +#endif + for (n = 0; n < 16; n++) { + int x_idx = n & 3, y_idx = n >> 2; + + if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows) + continue; + + xd->above_context = pc->above_context + mb_col + x_idx; + xd->left_context = pc->left_context + y_idx; + xd->mode_info_context = orig_mi + x_idx + y_idx * mis; + for (i = 0; i < 25; i++) { + xd->block[i].eob = 0; + xd->eobs[i] = 0; + } + + eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); + if (eobtotal == 0) { // skip loopfilter + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + continue; + } + + if (tx_size == TX_16X16) { + decode_16x16_sb(pbi, xd, bc, n, 3, 2); + } else if (tx_size == TX_8X8) { + decode_8x8_sb(pbi, xd, bc, n, 3, 2); + } else { + decode_4x4_sb(pbi, xd, bc, n, 3, 2); + } + } +#if CONFIG_TX32X32 + } +#endif + + xd->above_context = pc->above_context + mb_col; + xd->left_context = pc->left_context; + xd->mode_info_context = orig_mi; +} +#endif // CONFIG_SUPERBLOCKS64 + +static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, + int mb_row, unsigned int mb_col, + BOOL_DECODER* const bc) { + int i, n, eobtotal; + TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; + VP9_COMMON *const pc = &pbi->common; + MODE_INFO *orig_mi = xd->mode_info_context; + const int mis = pc->mode_info_stride; + + assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32); if (pbi->common.frame_type != KEY_FRAME) vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc); @@ -767,7 +917,7 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->above_context = pc->above_context + mb_col + x_idx; - xd->left_context = pc->left_context + y_idx; + xd->left_context = pc->left_context + y_idx + (mb_row & 2); xd->mode_info_context = orig_mi + x_idx + y_idx * mis; for (i = 0; i < 25; i++) { xd->block[i].eob = 0; @@ -781,16 +931,16 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, } if (tx_size == TX_16X16) { - decode_16x16_sb(pbi, xd, bc, n); + decode_16x16_sb(pbi, xd, bc, n, 1, 1); } else if (tx_size == TX_8X8) { - decode_8x8_sb(pbi, xd, bc, n); + decode_8x8_sb(pbi, xd, bc, n, 1, 1); } else { - decode_4x4_sb(pbi, xd, bc, n); + decode_4x4_sb(pbi, xd, bc, n, 1, 1); } } xd->above_context = pc->above_context + mb_col; - xd->left_context = pc->left_context; + xd->left_context = pc->left_context + (mb_row & 2); xd->mode_info_context = orig_mi; #if CONFIG_TX32X32 } @@ -807,7 +957,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, int tx_size; #if CONFIG_SUPERBLOCKS - assert(!xd->mode_info_context->mbmi.encoded_as_sb); + assert(!xd->mode_info_context->mbmi.sb_type); #endif // re-initialize macroblock dequantizer before detokenization @@ -930,190 +1080,186 @@ static int get_delta_q(vp9_reader *bc, int prev, int *q_update) { FILE *vpxlog = 0; #endif +static void set_offsets(VP9D_COMP *pbi, int block_size, + int mb_row, int mb_col) { + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + const int mis = cm->mode_info_stride; + const int idx = mis * mb_row + mb_col; + const int dst_fb_idx = cm->new_fb_idx; + const int recon_y_stride = cm->yv12_fb[dst_fb_idx].y_stride; + const int recon_uv_stride = cm->yv12_fb[dst_fb_idx].uv_stride; + const int recon_yoffset = mb_row * 16 * recon_y_stride + 16 * mb_col; + const int recon_uvoffset = mb_row * 8 * recon_uv_stride + 8 * mb_col; + + xd->mode_info_context = cm->mi + idx; +#if CONFIG_SUPERBLOCKS + xd->mode_info_context->mbmi.sb_type = block_size >> 5; +#endif + xd->prev_mode_info_context = cm->prev_mi + idx; + xd->above_context = cm->above_context + mb_col; + xd->left_context = cm->left_context + (mb_row & 3); + + /* Distance of Mb to the various image edges. + * These are specified to 8th pel as they are always compared to + * values that are in 1/8th pel units + */ + block_size >>= 4; // in mb units + xd->mb_to_top_edge = -((mb_row * 16)) << 3; + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3; + xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3; + + xd->up_available = (mb_row != 0); + xd->left_available = (mb_col != 0); + + xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; + xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; + xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; +} + +static void set_refs(VP9D_COMP *pbi, int block_size, + int mb_row, int mb_col) { + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + MODE_INFO *mi = xd->mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; + + if (mbmi->ref_frame > INTRA_FRAME) { + int ref_fb_idx, ref_yoffset, ref_uvoffset, ref_y_stride, ref_uv_stride; + + /* Select the appropriate reference frame for this MB */ + if (mbmi->ref_frame == LAST_FRAME) + ref_fb_idx = cm->lst_fb_idx; + else if (mbmi->ref_frame == GOLDEN_FRAME) + ref_fb_idx = cm->gld_fb_idx; + else + ref_fb_idx = cm->alt_fb_idx; + + ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; + ref_yoffset = mb_row * 16 * ref_y_stride + 16 * mb_col; + xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + ref_yoffset; + ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; + ref_uvoffset = mb_row * 8 * ref_uv_stride + 8 * mb_col; + xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + ref_uvoffset; + xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + ref_uvoffset; + + /* propagate errors from reference frames */ + xd->corrupted |= cm->yv12_fb[ref_fb_idx].corrupted; + + if (mbmi->second_ref_frame > INTRA_FRAME) { + int second_ref_fb_idx; + + /* Select the appropriate reference frame for this MB */ + if (mbmi->second_ref_frame == LAST_FRAME) + second_ref_fb_idx = cm->lst_fb_idx; + else if (mbmi->second_ref_frame == GOLDEN_FRAME) + second_ref_fb_idx = cm->gld_fb_idx; + else + second_ref_fb_idx = cm->alt_fb_idx; + + xd->second_pre.y_buffer = + cm->yv12_fb[second_ref_fb_idx].y_buffer + ref_yoffset; + xd->second_pre.u_buffer = + cm->yv12_fb[second_ref_fb_idx].u_buffer + ref_uvoffset; + xd->second_pre.v_buffer = + cm->yv12_fb[second_ref_fb_idx].v_buffer + ref_uvoffset; + + /* propagate errors from reference frames */ + xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted; + } + } + +#if CONFIG_SUPERBLOCKS + if (mbmi->sb_type) { + const int n_mbs = 1 << mbmi->sb_type; + const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row); + const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col); + const int mis = cm->mode_info_stride; + int x, y; + + for (y = 0; y < y_mbs; y++) { + for (x = !y; x < x_mbs; x++) { + mi[y * mis + x] = *mi; + } + } + } +#endif +} + /* Decode a row of Superblocks (2x2 region of MBs) */ -static void -decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, int mbrow, MACROBLOCKD *xd, - BOOL_DECODER* const bc) { - int i; - int sb_col; - int mb_row, mb_col; - int recon_yoffset, recon_uvoffset; - int ref_fb_idx = pc->lst_fb_idx; - int dst_fb_idx = pc->new_fb_idx; - int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride; - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - int sb_cols = (pc->mb_cols + 1) >> 1; +static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, + int mb_row, MACROBLOCKD *xd, + BOOL_DECODER* const bc) { + int mb_col; // For a SB there are 2 left contexts, each pertaining to a MB row within vpx_memset(pc->left_context, 0, sizeof(pc->left_context)); - mb_row = mbrow; - mb_col = 0; - - for (sb_col = 0; sb_col < sb_cols; sb_col++) { - MODE_INFO *mi = xd->mode_info_context; - -#if CONFIG_SUPERBLOCKS - mi->mbmi.encoded_as_sb = vp9_read(bc, pc->sb_coded); -#endif - - // Process the 4 MBs within the SB in the order: - // top-left, top-right, bottom-left, bottom-right - for (i = 0; i < 4; i++) { - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_extended = dy * xd->mode_info_stride + dx; - - xd->mb_index = i; - - mi = xd->mode_info_context; - if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols)) { - // MB lies outside frame, skip on to next - mb_row += dy; - mb_col += dx; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; - continue; - } -#if CONFIG_SUPERBLOCKS - if (i) - mi->mbmi.encoded_as_sb = 0; -#endif - - // Set above context pointer - xd->above_context = pc->above_context + mb_col; - xd->left_context = pc->left_context + (i >> 1); - - /* Distance of Mb to the various image edges. - * These are specified to 8th pel as they are always compared to - * values that are in 1/8th pel units - */ - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - xd->mb_to_left_edge = -((mb_col * 16) << 3); -#if CONFIG_SUPERBLOCKS - if (mi->mbmi.encoded_as_sb) { - xd->mb_to_bottom_edge = ((pc->mb_rows - 2 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((pc->mb_cols - 2 - mb_col) * 16) << 3; - } else { -#endif - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; -#if CONFIG_SUPERBLOCKS - } -#endif -#ifdef DEC_DEBUG - dec_debug = (pbi->common.current_video_frame == 46 && - mb_row == 5 && mb_col == 2); - if (dec_debug) -#if CONFIG_SUPERBLOCKS - printf("Enter Debug %d %d sb %d\n", mb_row, mb_col, - mi->mbmi.encoded_as_sb); -#else - printf("Enter Debug %d %d\n", mb_row, mb_col); -#endif -#endif - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); - - - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); - - xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; - + for (mb_col = 0; mb_col < pc->mb_cols; mb_col += 4) { +#if CONFIG_SUPERBLOCKS64 && CONFIG_SUPERBLOCKS + if (vp9_read(bc, pc->sb64_coded)) { + set_offsets(pbi, 64, mb_row, mb_col); vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc); + set_refs(pbi, 64, mb_row, mb_col); + decode_superblock64(pbi, xd, mb_row, mb_col, bc); + xd->corrupted |= bool_error(bc); + } else +#endif // CONFIG_SUPERBLOCKS64 + { + int j; - update_blockd_bmi(xd); -#ifdef DEC_DEBUG - if (dec_debug) - printf("Hello\n"); -#endif + for (j = 0; j < 4; j++) { + const int x_idx_sb = (j & 1) << 1, y_idx_sb = j & 2; - /* Select the appropriate reference frame for this MB */ - if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = pc->lst_fb_idx; - else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = pc->gld_fb_idx; - else - ref_fb_idx = pc->alt_fb_idx; + if (mb_row + y_idx_sb >= pc->mb_rows || + mb_col + x_idx_sb >= pc->mb_cols) { + // MB lies outside frame, skip on to next + continue; + } - xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; - xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; - xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - int second_ref_fb_idx; - - /* Select the appropriate reference frame for this MB */ - if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) - second_ref_fb_idx = pc->lst_fb_idx; - else if (xd->mode_info_context->mbmi.second_ref_frame == - GOLDEN_FRAME) - second_ref_fb_idx = pc->gld_fb_idx; - else - second_ref_fb_idx = pc->alt_fb_idx; - - xd->second_pre.y_buffer = - pc->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset; - xd->second_pre.u_buffer = - pc->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset; - xd->second_pre.v_buffer = - pc->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset; - } - - if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { - /* propagate errors from reference frames */ - xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted; - } + xd->sb_index = j; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - if (mb_col < pc->mb_cols - 1) - mi[1] = mi[0]; - if (mb_row < pc->mb_rows - 1) { - mi[pc->mode_info_stride] = mi[0]; - if (mb_col < pc->mb_cols - 1) - mi[pc->mode_info_stride + 1] = mi[0]; + if (vp9_read(bc, pc->sb32_coded)) { + set_offsets(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb); + vp9_decode_mb_mode_mv(pbi, + xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc); + set_refs(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb); + decode_superblock32(pbi, + xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc); + xd->corrupted |= bool_error(bc); + } else +#endif // CONFIG_SUPERBLOCKS + { + int i; + + // Process the 4 MBs within the SB in the order: + // top-left, top-right, bottom-left, bottom-right + for (i = 0; i < 4; i++) { + const int x_idx = x_idx_sb + (i & 1), y_idx = y_idx_sb + (i >> 1); + + if (mb_row + y_idx >= pc->mb_rows || + mb_col + x_idx >= pc->mb_cols) { + // MB lies outside frame, skip on to next + continue; + } + + set_offsets(pbi, 16, mb_row + y_idx, mb_col + x_idx); + xd->mb_index = i; + vp9_decode_mb_mode_mv(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc); + update_blockd_bmi(xd); + set_refs(pbi, 16, mb_row + y_idx, mb_col + x_idx); + vp9_intra_prediction_down_copy(xd); + decode_macroblock(pbi, xd, mb_row, mb_col, bc); + + /* check if the boolean decoder has suffered an error */ + xd->corrupted |= bool_error(bc); + } } } - if (xd->mode_info_context->mbmi.encoded_as_sb) { - decode_superblock(pbi, xd, mb_row, mb_col, bc); - } else { -#endif - vp9_intra_prediction_down_copy(xd); - decode_macroblock(pbi, xd, mb_row, mb_col, bc); -#if CONFIG_SUPERBLOCKS - } -#endif - - /* check if the boolean decoder has suffered an error */ - xd->corrupted |= bool_error(bc); - -#if CONFIG_SUPERBLOCKS - if (mi->mbmi.encoded_as_sb) { - assert(!i); - mb_col += 2; - xd->mode_info_context += 2; - xd->prev_mode_info_context += 2; - break; - } -#endif - - // skip to next MB - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; - mb_row += dy; - mb_col += dx; } } - - /* skip prediction column */ - xd->mode_info_context += 1 - (pc->mb_cols & 0x1) + xd->mode_info_stride; - xd->prev_mode_info_context += 1 - (pc->mb_cols & 0x1) + xd->mode_info_stride; } static unsigned int read_partition_size(const unsigned char *cx_size) { @@ -1462,7 +1608,10 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { } #if CONFIG_SUPERBLOCKS - pc->sb_coded = vp9_read_literal(&header_bc, 8); +#if CONFIG_SUPERBLOCKS64 + pc->sb64_coded = vp9_read_literal(&header_bc, 8); +#endif + pc->sb32_coded = vp9_read_literal(&header_bc, 8); #endif /* Read the loop filter level and type */ @@ -1727,12 +1876,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); - // Resset the macroblock mode info context to the start of the list - xd->mode_info_context = pc->mi; - xd->prev_mode_info_context = pc->prev_mi; - /* Decode a row of superblocks */ - for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 2) { + for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 4) { decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); } corrupt_tokens |= xd->corrupted; diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 956c16c48..a8fdc6626 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -562,19 +562,7 @@ static void write_mb_segid(vp9_writer *bc, const MB_MODE_INFO *mi, const MACROBLOCKD *xd) { // Encode the MB segment id. int seg_id = mi->segment_id; -#if CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb) { - if (xd->mb_to_right_edge >= 0) - seg_id = seg_id && xd->mode_info_context[1].mbmi.segment_id; - if (xd->mb_to_bottom_edge >= 0) { - seg_id = seg_id && - xd->mode_info_context[xd->mode_info_stride].mbmi.segment_id; - if (xd->mb_to_right_edge >= 0) - seg_id = seg_id && - xd->mode_info_context[xd->mode_info_stride + 1].mbmi.segment_id; - } - } -#endif + if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { switch (seg_id) { case 0: @@ -703,443 +691,364 @@ static void update_ref_probs(VP9_COMP *const cpi) { vp9_compute_mod_refprobs(cm); } -static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { +static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, + vp9_writer *bc, + int mb_rows_left, int mb_cols_left) { VP9_COMMON *const pc = &cpi->common; const nmv_context *nmvc = &pc->fc.nmvc; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - MODE_INFO *m; - MODE_INFO *prev_m; - TOKENEXTRA *tok = cpi->tok; - TOKENEXTRA *tok_end = tok + cpi->tok_count; - + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; const int mis = pc->mode_info_stride; - int mb_row, mb_col; - int row, col; - - // Values used in prediction model coding - vp9_prob pred_prob; - unsigned char prediction_flag; - - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - - cpi->mb.partition_info = cpi->mb.pi; - - mb_row = 0; - for (row = 0; row < pc->mb_rows; row += 2) { - m = pc->mi + row * mis; - prev_m = pc->prev_mi + row * mis; - - mb_col = 0; - for (col = 0; col < pc->mb_cols; col += 2) { - int i; - - // Process the 4 MBs in the order: - // top-left, top-right, bottom-left, bottom-right + MB_MODE_INFO *const mi = &m->mbmi; + const MV_REFERENCE_FRAME rf = mi->ref_frame; + const MB_PREDICTION_MODE mode = mi->mode; + const int segment_id = mi->segment_id; #if CONFIG_SUPERBLOCKS - vp9_write(bc, m->mbmi.encoded_as_sb, pc->sb_coded); -#endif - for (i = 0; i < 4; i++) { - MB_MODE_INFO *mi; - MV_REFERENCE_FRAME rf; - MV_REFERENCE_FRAME sec_ref_frame; - MB_PREDICTION_MODE mode; - int segment_id, skip_coeff; - - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_extended = dy * mis + dx; - - if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols)) { - // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - m += offset_extended; - prev_m += offset_extended; - cpi->mb.partition_info += offset_extended; - continue; - } - - mi = &m->mbmi; - rf = mi->ref_frame; - sec_ref_frame = mi->second_ref_frame; - mode = mi->mode; - segment_id = mi->segment_id; - - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to MV - // values that are in 1/8th pel units - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - -#if CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb) { - xd->mb_to_right_edge = ((pc->mb_cols - 2 - mb_col) * 16) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - 2 - mb_row) * 16) << 3; - } else { -#endif - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; -#if CONFIG_SUPERBLOCKS - } -#endif - - // Make sure the MacroBlockD mode info pointer is set correctly - xd->mode_info_context = m; - xd->prev_mode_info_context = prev_m; - -#ifdef ENTROPY_STATS - active_section = 9; -#endif - if (cpi->mb.e_mbd.update_mb_segmentation_map) { - // Is temporal coding of the segment map enabled - if (pc->temporal_update) { - prediction_flag = vp9_get_pred_flag(xd, PRED_SEG_ID); - pred_prob = vp9_get_pred_prob(pc, xd, PRED_SEG_ID); - - // Code the segment id prediction flag for this mb - vp9_write(bc, prediction_flag, pred_prob); - - // If the mb segment id wasn't predicted code explicitly - if (!prediction_flag) - write_mb_segid(bc, mi, &cpi->mb.e_mbd); - } else { - // Normal unpredicted coding - write_mb_segid(bc, mi, &cpi->mb.e_mbd); - } - } - - skip_coeff = 1; - if (pc->mb_no_coeff_skip && - (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || - (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) { - skip_coeff = mi->mb_skip_coeff; -#if CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb) { - skip_coeff &= m[1].mbmi.mb_skip_coeff; - skip_coeff &= m[mis].mbmi.mb_skip_coeff; - skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff; - } -#endif - vp9_write(bc, skip_coeff, - vp9_get_pred_prob(pc, xd, PRED_MBSKIP)); - } - - // Encode the reference frame. - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) - || vp9_get_segdata(xd, segment_id, SEG_LVL_MODE) >= NEARESTMV) { - encode_ref_frame(bc, pc, xd, segment_id, rf); - } else { - assert(rf == INTRA_FRAME); - } - - if (rf == INTRA_FRAME) { -#ifdef ENTROPY_STATS - active_section = 6; -#endif - - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { -#if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) - write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob); - else -#endif - write_ymode(bc, mode, pc->fc.ymode_prob); - } - if (mode == B_PRED) { - int j = 0; -#if CONFIG_COMP_INTRA_PRED - int uses_second = - m->bmi[0].as_mode.second != - (B_PREDICTION_MODE)(B_DC_PRED - 1); - vp9_write(bc, uses_second, DEFAULT_COMP_INTRA_PROB); -#endif - do { -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE mode2 = m->bmi[j].as_mode.second; -#endif - write_bmode(bc, m->bmi[j].as_mode.first, - pc->fc.bmode_prob); -#if CONFIG_COMP_INTRA_PRED - if (uses_second) { - write_bmode(bc, mode2, pc->fc.bmode_prob); - } -#endif - } while (++j < 16); - } - if (mode == I8X8_PRED) { - write_i8x8_mode(bc, m->bmi[0].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[2].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[8].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[10].as_mode.first, - pc->fc.i8x8_mode_prob); - } else { - write_uv_mode(bc, mi->uv_mode, - pc->fc.uv_mode_prob[mode]); - } - } else { - vp9_prob mv_ref_p [VP9_MVREFS - 1]; - - vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]); - - -// #ifdef ENTROPY_STATS -#ifdef ENTROPY_STATS - accum_mv_refs(mode, ct); - active_section = 3; -#endif - - // Is the segment coding of mode enabled - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { -#if CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb) { - write_sb_mv_ref(bc, mode, mv_ref_p); - } else -#endif - { - write_mv_ref(bc, mode, mv_ref_p); - } - vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]); - } - -#if CONFIG_PRED_FILTER - // Is the prediction filter enabled - if (mode >= NEARESTMV && mode < SPLITMV) { - if (cpi->common.pred_filter_mode == 2) - vp9_write(bc, mi->pred_filter_enabled, - pc->prob_pred_filter_off); - else - assert(mi->pred_filter_enabled == - cpi->common.pred_filter_mode); - } -#endif - if (mode >= NEARESTMV && mode <= SPLITMV) - { - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - write_token(bc, vp9_switchable_interp_tree, - vp9_get_pred_probs(&cpi->common, xd, - PRED_SWITCHABLE_INTERP), - vp9_switchable_interp_encodings + - vp9_switchable_interp_map[mi->interp_filter]); - } else { - assert (mi->interp_filter == - cpi->common.mcomp_filter_type); - } - } - - // does the feature use compound prediction or not - // (if not specified at the frame/segment level) - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { - vp9_write(bc, mi->second_ref_frame > INTRA_FRAME, - vp9_get_pred_prob(pc, xd, PRED_COMP)); - } -#if CONFIG_COMP_INTERINTRA_PRED - if (cpi->common.use_interintra && - mode >= NEARESTMV && mode < SPLITMV && - mi->second_ref_frame <= INTRA_FRAME) { - vp9_write(bc, mi->second_ref_frame == INTRA_FRAME, - pc->fc.interintra_prob); - // if (!cpi->dummy_packing) - // printf("-- %d (%d)\n", mi->second_ref_frame == INTRA_FRAME, - // pc->fc.interintra_prob); - if (mi->second_ref_frame == INTRA_FRAME) { - // if (!cpi->dummy_packing) - // printf("** %d %d\n", mi->interintra_mode, - // mi->interintra_uv_mode); - write_ymode(bc, mi->interintra_mode, pc->fc.ymode_prob); -#if SEPARATE_INTERINTRA_UV - write_uv_mode(bc, mi->interintra_uv_mode, - pc->fc.uv_mode_prob[mi->interintra_mode]); -#endif - } - } -#endif - -#if CONFIG_NEW_MVREF - // if ((mode == NEWMV) || (mode == SPLITMV)) { - if (mode == NEWMV) { - // Encode the index of the choice. - vp9_write_mv_ref_id(bc, - xd->mb_mv_ref_probs[rf], mi->best_index); - - if (mi->second_ref_frame > 0) { - // Encode the index of the choice. - vp9_write_mv_ref_id( - bc, xd->mb_mv_ref_probs[mi->second_ref_frame], - mi->best_second_index); - } - } -#endif - { - switch (mode) { /* new, split require MVs */ - case NEWMV: -#ifdef ENTROPY_STATS - active_section = 5; -#endif - write_nmv(bc, &mi->mv[0].as_mv, &mi->best_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - - if (mi->second_ref_frame > 0) { - write_nmv(bc, &mi->mv[1].as_mv, &mi->best_second_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - } - break; - case SPLITMV: { - int j = 0; - -#ifdef MODE_STATS - ++count_mb_seg [mi->partitioning]; -#endif - - write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob); - cpi->mbsplit_count[mi->partitioning]++; - - do { - B_PREDICTION_MODE blockmode; - int_mv blockmv; - const int *const L = - vp9_mbsplits [mi->partitioning]; - int k = -1; /* first block in subset j */ - int mv_contz; - int_mv leftmv, abovemv; - - blockmode = cpi->mb.partition_info->bmi[j].mode; - blockmv = cpi->mb.partition_info->bmi[j].mv; -#if CONFIG_DEBUG - while (j != L[++k]) - if (k >= 16) - assert(0); + const int mb_size = 1 << mi->sb_type; #else - while (j != L[++k]); + const int mb_size = 1; #endif - leftmv.as_int = left_block_mv(m, k); - abovemv.as_int = above_block_mv(m, k, mis); - mv_contz = vp9_mv_cont(&leftmv, &abovemv); + int skip_coeff; - write_sub_mv_ref(bc, blockmode, - cpi->common.fc.sub_mv_ref_prob [mv_contz]); - cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++; - if (blockmode == NEW4X4) { -#ifdef ENTROPY_STATS - active_section = 11; -#endif - write_nmv(bc, &blockmv.as_mv, &mi->best_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); + int mb_row = pc->mb_rows - mb_rows_left; + int mb_col = pc->mb_cols - mb_cols_left; + xd->prev_mode_info_context = pc->prev_mi + (m - pc->mi); + x->partition_info = x->pi + (m - pc->mi); - if (mi->second_ref_frame > 0) { - write_nmv(bc, - &cpi->mb.partition_info->bmi[j].second_mv.as_mv, - &mi->best_second_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - } - } - } while (++j < cpi->mb.partition_info->count); - } - break; - default: - break; - } - } - } - - if (((rf == INTRA_FRAME && mode <= I8X8_PRED) || - (rf != INTRA_FRAME && !(mode == SPLITMV && - mi->partitioning == PARTITIONING_4X4))) && - pc->txfm_mode == TX_MODE_SELECT && - !((pc->mb_no_coeff_skip && skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { - TX_SIZE sz = mi->txfm_size; - // FIXME(rbultje) code ternary symbol once all experiments are merged - vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); - if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) { - vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb && sz != TX_8X8) - vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]); -#endif - } - } + // Distance of Mb to the various image edges. + // These specified to 8th pel as they are always compared to MV + // values that are in 1/8th pel units + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_top_edge = -((mb_row * 16)) << 3; + xd->mb_to_right_edge = ((pc->mb_cols - mb_size - mb_col) * 16) << 3; + xd->mb_to_bottom_edge = ((pc->mb_rows - mb_size - mb_row) * 16) << 3; #ifdef ENTROPY_STATS - active_section = 1; -#endif - assert(tok < tok_end); - pack_mb_tokens(bc, &tok, tok_end); - -#if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { - assert(!i); - mb_col += 2; - m += 2; - cpi->mb.partition_info += 2; - prev_m += 2; - break; - } + active_section = 9; #endif - // Next MB - mb_row += dy; - mb_col += dx; - m += offset_extended; - prev_m += offset_extended; - cpi->mb.partition_info += offset_extended; -#if CONFIG_DEBUG - assert((prev_m - cpi->common.prev_mip) == (m - cpi->common.mip)); - assert((prev_m - cpi->common.prev_mi) == (m - cpi->common.mi)); -#endif + if (cpi->mb.e_mbd.update_mb_segmentation_map) { + // Is temporal coding of the segment map enabled + if (pc->temporal_update) { + unsigned char prediction_flag = vp9_get_pred_flag(xd, PRED_SEG_ID); + vp9_prob pred_prob = vp9_get_pred_prob(pc, xd, PRED_SEG_ID); + + // Code the segment id prediction flag for this mb + vp9_write(bc, prediction_flag, pred_prob); + + // If the mb segment id wasn't predicted code explicitly + if (!prediction_flag) + write_mb_segid(bc, mi, &cpi->mb.e_mbd); + } else { + // Normal unpredicted coding + write_mb_segid(bc, mi, &cpi->mb.e_mbd); + } + } + + if (!pc->mb_no_coeff_skip) { + skip_coeff = 0; + } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) { + skip_coeff = 1; + } else { + const int nmbs = mb_size; + const int xmbs = MIN(nmbs, mb_cols_left); + const int ymbs = MIN(nmbs, mb_rows_left); + int x, y; + + skip_coeff = 1; + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff; } } - // Next SB - mb_row += 2; - m += mis + (1 - (pc->mb_cols & 0x1)); - prev_m += mis + (1 - (pc->mb_cols & 0x1)); - cpi->mb.partition_info += mis + (1 - (pc->mb_cols & 0x1)); + vp9_write(bc, skip_coeff, + vp9_get_pred_prob(pc, xd, PRED_MBSKIP)); + } + + // Encode the reference frame. + if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) + || vp9_get_segdata(xd, segment_id, SEG_LVL_MODE) >= NEARESTMV) { + encode_ref_frame(bc, pc, xd, segment_id, rf); + } else { + assert(rf == INTRA_FRAME); + } + + if (rf == INTRA_FRAME) { +#ifdef ENTROPY_STATS + active_section = 6; +#endif + + if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { +#if CONFIG_SUPERBLOCKS + if (m->mbmi.sb_type) + write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob); + else +#endif + write_ymode(bc, mode, pc->fc.ymode_prob); + } + if (mode == B_PRED) { + int j = 0; +#if CONFIG_COMP_INTRA_PRED + int uses_second = + m->bmi[0].as_mode.second != + (B_PREDICTION_MODE)(B_DC_PRED - 1); + vp9_write(bc, uses_second, DEFAULT_COMP_INTRA_PROB); +#endif + do { +#if CONFIG_COMP_INTRA_PRED + B_PREDICTION_MODE mode2 = m->bmi[j].as_mode.second; +#endif + write_bmode(bc, m->bmi[j].as_mode.first, + pc->fc.bmode_prob); +#if CONFIG_COMP_INTRA_PRED + if (uses_second) { + write_bmode(bc, mode2, pc->fc.bmode_prob); + } +#endif + } while (++j < 16); + } + if (mode == I8X8_PRED) { + write_i8x8_mode(bc, m->bmi[0].as_mode.first, + pc->fc.i8x8_mode_prob); + write_i8x8_mode(bc, m->bmi[2].as_mode.first, + pc->fc.i8x8_mode_prob); + write_i8x8_mode(bc, m->bmi[8].as_mode.first, + pc->fc.i8x8_mode_prob); + write_i8x8_mode(bc, m->bmi[10].as_mode.first, + pc->fc.i8x8_mode_prob); + } else { + write_uv_mode(bc, mi->uv_mode, + pc->fc.uv_mode_prob[mode]); + } + } else { + vp9_prob mv_ref_p[VP9_MVREFS - 1]; + + vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]); + + // #ifdef ENTROPY_STATS +#ifdef ENTROPY_STATS + accum_mv_refs(mode, ct); + active_section = 3; +#endif + + // Is the segment coding of mode enabled + if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { +#if CONFIG_SUPERBLOCKS + if (mi->sb_type) { + write_sb_mv_ref(bc, mode, mv_ref_p); + } else +#endif + { + write_mv_ref(bc, mode, mv_ref_p); + } + vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]); + } + +#if CONFIG_PRED_FILTER + // Is the prediction filter enabled + if (mode >= NEARESTMV && mode < SPLITMV) { + if (cpi->common.pred_filter_mode == 2) + vp9_write(bc, mi->pred_filter_enabled, + pc->prob_pred_filter_off); + else + assert(mi->pred_filter_enabled == + cpi->common.pred_filter_mode); + } +#endif + if (mode >= NEARESTMV && mode <= SPLITMV) { + if (cpi->common.mcomp_filter_type == SWITCHABLE) { + write_token(bc, vp9_switchable_interp_tree, + vp9_get_pred_probs(&cpi->common, xd, + PRED_SWITCHABLE_INTERP), + vp9_switchable_interp_encodings + + vp9_switchable_interp_map[mi->interp_filter]); + } else { + assert(mi->interp_filter == cpi->common.mcomp_filter_type); + } + } + + // does the feature use compound prediction or not + // (if not specified at the frame/segment level) + if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { + vp9_write(bc, mi->second_ref_frame > INTRA_FRAME, + vp9_get_pred_prob(pc, xd, PRED_COMP)); + } +#if CONFIG_COMP_INTERINTRA_PRED + if (cpi->common.use_interintra && + mode >= NEARESTMV && mode < SPLITMV && + mi->second_ref_frame <= INTRA_FRAME) { + vp9_write(bc, mi->second_ref_frame == INTRA_FRAME, + pc->fc.interintra_prob); + // if (!cpi->dummy_packing) + // printf("-- %d (%d)\n", mi->second_ref_frame == INTRA_FRAME, + // pc->fc.interintra_prob); + if (mi->second_ref_frame == INTRA_FRAME) { + // if (!cpi->dummy_packing) + // printf("** %d %d\n", mi->interintra_mode, + // mi->interintra_uv_mode); + write_ymode(bc, mi->interintra_mode, pc->fc.ymode_prob); +#if SEPARATE_INTERINTRA_UV + write_uv_mode(bc, mi->interintra_uv_mode, + pc->fc.uv_mode_prob[mi->interintra_mode]); +#endif + } + } +#endif + +#if CONFIG_NEW_MVREF + // if ((mode == NEWMV) || (mode == SPLITMV)) { + if (mode == NEWMV) { + // Encode the index of the choice. + vp9_write_mv_ref_id(bc, + xd->mb_mv_ref_probs[rf], mi->best_index); + + if (mi->second_ref_frame > 0) { + // Encode the index of the choice. + vp9_write_mv_ref_id( + bc, xd->mb_mv_ref_probs[mi->second_ref_frame], + mi->best_second_index); + } + } +#endif + + switch (mode) { /* new, split require MVs */ + case NEWMV: +#ifdef ENTROPY_STATS + active_section = 5; +#endif + write_nmv(bc, &mi->mv[0].as_mv, &mi->best_mv, + (const nmv_context*) nmvc, + xd->allow_high_precision_mv); + + if (mi->second_ref_frame > 0) { + write_nmv(bc, &mi->mv[1].as_mv, &mi->best_second_mv, + (const nmv_context*) nmvc, + xd->allow_high_precision_mv); + } + break; + case SPLITMV: { + int j = 0; + +#ifdef MODE_STATS + ++count_mb_seg[mi->partitioning]; +#endif + + write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob); + cpi->mbsplit_count[mi->partitioning]++; + + do { + B_PREDICTION_MODE blockmode; + int_mv blockmv; + const int *const L = vp9_mbsplits[mi->partitioning]; + int k = -1; /* first block in subset j */ + int mv_contz; + int_mv leftmv, abovemv; + + blockmode = cpi->mb.partition_info->bmi[j].mode; + blockmv = cpi->mb.partition_info->bmi[j].mv; +#if CONFIG_DEBUG + while (j != L[++k]) + if (k >= 16) + assert(0); +#else + while (j != L[++k]); +#endif + leftmv.as_int = left_block_mv(m, k); + abovemv.as_int = above_block_mv(m, k, mis); + mv_contz = vp9_mv_cont(&leftmv, &abovemv); + + write_sub_mv_ref(bc, blockmode, + cpi->common.fc.sub_mv_ref_prob[mv_contz]); + cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++; + if (blockmode == NEW4X4) { +#ifdef ENTROPY_STATS + active_section = 11; +#endif + write_nmv(bc, &blockmv.as_mv, &mi->best_mv, + (const nmv_context*) nmvc, + xd->allow_high_precision_mv); + + if (mi->second_ref_frame > 0) { + write_nmv(bc, + &cpi->mb.partition_info->bmi[j].second_mv.as_mv, + &mi->best_second_mv, + (const nmv_context*) nmvc, + xd->allow_high_precision_mv); + } + } + } while (++j < cpi->mb.partition_info->count); + break; + } + default: + break; + } + } + + if (((rf == INTRA_FRAME && mode <= I8X8_PRED) || + (rf != INTRA_FRAME && !(mode == SPLITMV && + mi->partitioning == PARTITIONING_4X4))) && + pc->txfm_mode == TX_MODE_SELECT && + !((pc->mb_no_coeff_skip && skip_coeff) || + (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { + TX_SIZE sz = mi->txfm_size; + // FIXME(rbultje) code ternary symbol once all experiments are merged + vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); + if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) { + vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]); +#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS + if (mi->sb_type && sz != TX_8X8) + vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]); +#endif + } } } - -static void write_mb_modes_kf(const VP9_COMMON *c, - const MACROBLOCKD *xd, - const MODE_INFO *m, - int mode_info_stride, - vp9_writer *const bc) { - int ym; - int segment_id; - - ym = m->mbmi.mode; - segment_id = m->mbmi.segment_id; +static void write_mb_modes_kf(const VP9_COMP *cpi, + const MODE_INFO *m, + vp9_writer *bc, + int mb_rows_left, int mb_cols_left) { + const VP9_COMMON *const c = &cpi->common; + const MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int mis = c->mode_info_stride; + const int ym = m->mbmi.mode; + const int segment_id = m->mbmi.segment_id; + int skip_coeff; if (xd->update_mb_segmentation_map) { write_mb_segid(bc, &m->mbmi, xd); } - if (c->mb_no_coeff_skip && - (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || - (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) { - int skip_coeff = m->mbmi.mb_skip_coeff; + if (!c->mb_no_coeff_skip) { + skip_coeff = 0; + } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) { + skip_coeff = 1; + } else { #if CONFIG_SUPERBLOCKS - const int mis = mode_info_stride; - if (m->mbmi.encoded_as_sb) { - skip_coeff &= m[1].mbmi.mb_skip_coeff; - skip_coeff &= m[mis].mbmi.mb_skip_coeff; - skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff; - } + const int nmbs = 1 << m->mbmi.sb_type; +#else + const int nmbs = 1; #endif - vp9_write(bc, skip_coeff, - vp9_get_pred_prob(c, xd, PRED_MBSKIP)); + const int xmbs = MIN(nmbs, mb_cols_left); + const int ymbs = MIN(nmbs, mb_rows_left); + int x, y; + + skip_coeff = 1; + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff; + } + } + + vp9_write(bc, skip_coeff, + vp9_get_pred_prob(c, xd, PRED_MBSKIP)); } #if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { + if (m->mbmi.sb_type) { sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); } else @@ -1150,7 +1059,6 @@ static void write_mb_modes_kf(const VP9_COMMON *c, } if (ym == B_PRED) { - const int mis = c->mode_info_stride; int i = 0; #if CONFIG_COMP_INTRA_PRED int uses_second = @@ -1195,7 +1103,7 @@ static void write_mb_modes_kf(const VP9_COMMON *c, write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT && - !((c->mb_no_coeff_skip && m->mbmi.mb_skip_coeff) || + !((c->mb_no_coeff_skip && skip_coeff) || (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { TX_SIZE sz = m->mbmi.txfm_size; @@ -1204,75 +1112,99 @@ static void write_mb_modes_kf(const VP9_COMMON *c, if (sz != TX_4X4 && ym <= TM_PRED) { vp9_write(bc, sz != TX_8X8, c->prob_tx[1]); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb && sz != TX_8X8) + if (m->mbmi.sb_type && sz != TX_8X8) vp9_write(bc, sz != TX_16X16, c->prob_tx[2]); #endif } } } -static void write_kfmodes(VP9_COMP* const cpi, vp9_writer* const bc) { +static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, + TOKENEXTRA **tok, TOKENEXTRA *tok_end, + int mb_row, int mb_col) { + VP9_COMMON *const c = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + + xd->mode_info_context = m; + if (c->frame_type == KEY_FRAME) { + write_mb_modes_kf(cpi, m, bc, + c->mb_rows - mb_row, c->mb_cols - mb_col); +#ifdef ENTROPY_STATS + active_section = 8; +#endif + } else { + pack_inter_mode_mvs(cpi, m, bc, + c->mb_rows - mb_row, c->mb_cols - mb_col); +#ifdef ENTROPY_STATS + active_section = 1; +#endif + } + + assert(*tok < tok_end); + pack_mb_tokens(bc, tok, tok_end); +} + +static void write_modes(VP9_COMP *cpi, vp9_writer* const bc) { VP9_COMMON *const c = &cpi->common; const int mis = c->mode_info_stride; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - MODE_INFO *m; - int i; - int row, col; - int mb_row, mb_col; - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; + MODE_INFO *m, *m_ptr = c->mi; + int i, mb_row, mb_col; TOKENEXTRA *tok = cpi->tok; TOKENEXTRA *tok_end = tok + cpi->tok_count; - mb_row = 0; - for (row = 0; row < c->mb_rows; row += 2) { - m = c->mi + row * mis; + for (mb_row = 0; mb_row < c->mb_rows; mb_row += 4, m_ptr += 4 * mis) { + m = m_ptr; + for (mb_col = 0; mb_col < c->mb_cols; mb_col += 4, m += 4) { +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + vp9_write(bc, m->mbmi.sb_type == BLOCK_SIZE_SB64X64, c->sb64_coded); + if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + write_modes_b(cpi, m, bc, &tok, tok_end, mb_row, mb_col); + } else +#endif + { + int j; - mb_col = 0; - for (col = 0; col < c->mb_cols; col += 2) { + for (j = 0; j < 4; j++) { + const int x_idx_sb = (j & 1) << 1, y_idx_sb = j & 2; #if CONFIG_SUPERBLOCKS - vp9_write(bc, m->mbmi.encoded_as_sb, c->sb_coded); + MODE_INFO *sb_m = m + y_idx_sb * mis + x_idx_sb; #endif - // Process the 4 MBs in the order: - // top-left, top-right, bottom-left, bottom-right - for (i = 0; i < 4; i++) { - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_extended = dy * mis + dx; - if ((mb_row >= c->mb_rows) || (mb_col >= c->mb_cols)) { - // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - m += offset_extended; - continue; - } - - // Make sure the MacroBlockD mode info pointer is set correctly - xd->mode_info_context = m; - - write_mb_modes_kf(c, xd, m, mis, bc); -#ifdef ENTROPY_STATS - active_section = 8; -#endif - assert(tok < tok_end); - pack_mb_tokens(bc, &tok, tok_end); + if (mb_col + x_idx_sb >= c->mb_cols || + mb_row + y_idx_sb >= c->mb_rows) + continue; #if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { - assert(!i); - mb_col += 2; - m += 2; - break; - } + vp9_write(bc, sb_m->mbmi.sb_type, c->sb32_coded); + if (sb_m->mbmi.sb_type) { + assert(sb_m->mbmi.sb_type == BLOCK_SIZE_SB32X32); + write_modes_b(cpi, sb_m, bc, &tok, tok_end, + mb_row + y_idx_sb, mb_col + x_idx_sb); + } else #endif - // Next MB - mb_row += dy; - mb_col += dx; - m += offset_extended; + { + // Process the 4 MBs in the order: + // top-left, top-right, bottom-left, bottom-right + for (i = 0; i < 4; i++) { + const int x_idx = x_idx_sb + (i & 1), y_idx = y_idx_sb + (i >> 1); + MODE_INFO *mb_m = m + x_idx + y_idx * mis; + + if (mb_row + y_idx >= c->mb_rows || + mb_col + x_idx >= c->mb_cols) { + // MB lies outside frame, move on + continue; + } + +#if CONFIG_SUPERBLOCKS + assert(mb_m->mbmi.sb_type == BLOCK_SIZE_MB16X16); +#endif + write_modes_b(cpi, mb_m, bc, &tok, tok_end, + mb_row + y_idx, mb_col + x_idx); + } + } + } } } - mb_row += 2; } } @@ -1800,13 +1732,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } #if CONFIG_SUPERBLOCKS - { - /* sb mode probability */ - const int sb_max = (((pc->mb_rows + 1) >> 1) * ((pc->mb_cols + 1) >> 1)); - - pc->sb_coded = get_prob(sb_max - cpi->sb_count, sb_max); - vp9_write_literal(&header_bc, pc->sb_coded, 8); - } +#if CONFIG_SUPERBLOCKS64 + pc->sb64_coded = get_binary_prob(cpi->sb64_count[0], cpi->sb64_count[1]); + vp9_write_literal(&header_bc, pc->sb64_coded, 8); +#endif + pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]); + vp9_write_literal(&header_bc, pc->sb32_coded, 8); #endif { @@ -2195,12 +2126,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, if (pc->frame_type == KEY_FRAME) { decide_kf_ymode_entropy(cpi); - write_kfmodes(cpi, &residual_bc); + write_modes(cpi, &residual_bc); } else { /* This is not required if the counts in cpi are consistent with the * final packing pass */ // if (!cpi->dummy_packing) vp9_zero(cpi->NMVcount); - pack_inter_mode_mvs(cpi, &residual_bc); + write_modes(cpi, &residual_bc); vp9_update_mode_context(&cpi->common); } diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index f5cfbd1a1..e8f6f46b0 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -181,10 +181,13 @@ typedef struct macroblock { // Structure to hold context for each of the 4 MBs within a SB: // when encoded as 4 independent MBs: - PICK_MODE_CONTEXT mb_context[4]; + PICK_MODE_CONTEXT mb_context[4][4]; #if CONFIG_SUPERBLOCKS // when 4 MBs share coding parameters: - PICK_MODE_CONTEXT sb_context[4]; + PICK_MODE_CONTEXT sb32_context[4]; +#if CONFIG_SUPERBLOCKS64 + PICK_MODE_CONTEXT sb64_context; +#endif // CONFIG_SUPERBLOCKS64 #endif void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 3219e12da..219295052 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -47,14 +47,17 @@ int enc_debug = 0; extern void select_interp_filter_type(VP9_COMP *cpi); -static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int output_enabled, - int mb_col, int mb_row); +static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, int mb_row, int mb_col); -static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int mb_col, int mb_row); +static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, int mb_row, int mb_col); + +static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, int mb_row, int mb_col); static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x); @@ -431,37 +434,45 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x, #endif static void update_state(VP9_COMP *cpi, MACROBLOCK *x, - PICK_MODE_CONTEXT *ctx) { - int i; + PICK_MODE_CONTEXT *ctx, int block_size, + int output_enabled) { + int i, x_idx, y; MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = &ctx->mic; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; int mb_mode = mi->mbmi.mode; int mb_mode_index = ctx->best_mode_index; + const int mis = cpi->common.mode_info_stride; +#if CONFIG_SUPERBLOCKS + int mb_block_size = 1 << mi->mbmi.sb_type; +#else + int mb_block_size = 1; +#endif #if CONFIG_DEBUG assert(mb_mode < MB_MODE_COUNT); assert(mb_mode_index < MAX_MODES); assert(mi->mbmi.ref_frame < MAX_REF_FRAMES); #endif +#if CONFIG_SUPERBLOCKS + assert(mi->mbmi.sb_type == (block_size >> 5)); +#endif // Restore the coding context of the MB to that that was in place // when the mode was picked for it - vpx_memcpy(xd->mode_info_context, mi, sizeof(MODE_INFO)); -#if CONFIG_SUPERBLOCKS - if (mi->mbmi.encoded_as_sb) { - const int mis = cpi->common.mode_info_stride; - if (xd->mb_to_right_edge >= 0) - vpx_memcpy(xd->mode_info_context + 1, mi, sizeof(MODE_INFO)); - if (xd->mb_to_bottom_edge >= 0) { - vpx_memcpy(xd->mode_info_context + mis, mi, sizeof(MODE_INFO)); - if (xd->mb_to_right_edge >= 0) - vpx_memcpy(xd->mode_info_context + mis + 1, mi, sizeof(MODE_INFO)); + for (y = 0; y < mb_block_size; y++) { + for (x_idx = 0; x_idx < mb_block_size; x_idx++) { + if ((xd->mb_to_right_edge >> 7) + mb_block_size > x_idx && + (xd->mb_to_bottom_edge >> 7) + mb_block_size > y) { + MODE_INFO *mi_addr = xd->mode_info_context + x_idx + y * mis; + + vpx_memcpy(mi_addr, mi, sizeof(MODE_INFO)); + } } + } #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - } else { + if (block_size == 16) { ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16]; -#endif } #endif @@ -482,6 +493,9 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int; } + if (!output_enabled) + return; + { int segment_id = mbmi->segment_id; if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || @@ -603,6 +617,135 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, } } +static unsigned find_seg_id(uint8_t *buf, int block_size, + int start_y, int height, int start_x, int width) { + const int end_x = MIN(start_x + block_size, width); + const int end_y = MIN(start_y + block_size, height); + int x, y; + unsigned seg_id = -1; + + buf += width * start_y; + for (y = start_y; y < end_y; y++, buf += width) { + for (x = start_x; x < end_x; x++) { + seg_id = MIN(seg_id, buf[x]); + } + } + + return seg_id; +} + +static void set_offsets(VP9_COMP *cpi, + int mb_row, int mb_col, int block_size, + int *ref_yoffset, int *ref_uvoffset) { + MACROBLOCK *const x = &cpi->mb; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi; + const int dst_fb_idx = cm->new_fb_idx; + const int recon_y_stride = cm->yv12_fb[dst_fb_idx].y_stride; + const int recon_uv_stride = cm->yv12_fb[dst_fb_idx].uv_stride; + const int recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col; + const int recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col; + const int src_y_stride = x->src.y_stride; + const int src_uv_stride = x->src.uv_stride; + const int src_yoffset = 16 * mb_row * src_y_stride + 16 * mb_col; + const int src_uvoffset = 8 * mb_row * src_uv_stride + 8 * mb_col; + const int ref_fb_idx = cm->lst_fb_idx; + const int ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; + const int ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; + const int idx_map = mb_row * cm->mb_cols + mb_col; + const int idx_str = xd->mode_info_stride * mb_row + mb_col; + + // entropy context structures + xd->above_context = cm->above_context + mb_col; + xd->left_context = cm->left_context + (mb_row & 3); + + // GF active flags data structure + x->gf_active_ptr = (signed char *)&cpi->gf_active_flags[idx_map]; + + // Activity map pointer + x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; + x->active_ptr = cpi->active_map + idx_map; + + /* pointers to mode info contexts */ + x->partition_info = x->pi + idx_str; + xd->mode_info_context = cm->mi + idx_str; + mbmi = &xd->mode_info_context->mbmi; + xd->prev_mode_info_context = cm->prev_mi + idx_str; + + // Set up destination pointers + xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; + xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; + xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; + + /* Set up limit values for MV components to prevent them from + * extending beyond the UMV borders assuming 16x16 block size */ + x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); + x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); + x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + + (VP9BORDERINPIXELS - block_size - VP9_INTERP_EXTEND)); + x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + + (VP9BORDERINPIXELS - block_size - VP9_INTERP_EXTEND)); + + // Set up distance of MB to edge of frame in 1/8th pel units + block_size >>= 4; // in macroblock units + assert(!(mb_col & (block_size - 1)) && !(mb_row & (block_size - 1))); + xd->mb_to_top_edge = -((mb_row * 16) << 3); + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3; + xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3; + + // Are edges available for intra prediction? + xd->up_available = (mb_row != 0); + xd->left_available = (mb_col != 0); + + /* Reference buffer offsets */ + *ref_yoffset = (mb_row * ref_y_stride * 16) + (mb_col * 16); + *ref_uvoffset = (mb_row * ref_uv_stride * 8) + (mb_col * 8); + + /* set up source buffers */ + x->src.y_buffer = cpi->Source->y_buffer + src_yoffset; + x->src.u_buffer = cpi->Source->u_buffer + src_uvoffset; + x->src.v_buffer = cpi->Source->v_buffer + src_uvoffset; + + /* R/D setup */ + x->rddiv = cpi->RDDIV; + x->rdmult = cpi->RDMULT; + + /* segment ID */ + if (xd->segmentation_enabled) { + if (xd->update_mb_segmentation_map) { + mbmi->segment_id = find_seg_id(cpi->segmentation_map, block_size, + mb_row, cm->mb_rows, mb_col, cm->mb_cols); + } else { + mbmi->segment_id = find_seg_id(cm->last_frame_seg_map, block_size, + mb_row, cm->mb_rows, mb_col, cm->mb_cols); + } + assert(mbmi->segment_id <= 3); + vp9_mb_init_quantizer(cpi, x); + + if (xd->segmentation_enabled && cpi->seg0_cnt > 0 && + !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME) && + vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME) && + vp9_check_segref(xd, 1, INTRA_FRAME) + + vp9_check_segref(xd, 1, LAST_FRAME) + + vp9_check_segref(xd, 1, GOLDEN_FRAME) + + vp9_check_segref(xd, 1, ALTREF_FRAME) == 1) { + cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; + } else { + const int y = mb_row & ~3; + const int x = mb_col & ~3; + const int p16 = ((mb_row & 1) << 1) + (mb_col & 1); + const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1); + + cpi->seg0_progress = + ((y * cm->mb_cols + x * 4 + p32 + p16) << 16) / cm->MBs; + } + } else { + mbmi->segment_id = 0; + } +} + static void pick_mb_modes(VP9_COMP *cpi, VP9_COMMON *cm, int mb_row, @@ -613,24 +756,15 @@ static void pick_mb_modes(VP9_COMP *cpi, int *totalrate, int *totaldist) { int i; - int map_index; int recon_yoffset, recon_uvoffset; - int ref_fb_idx = cm->lst_fb_idx; - int dst_fb_idx = cm->new_fb_idx; - int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; ENTROPY_CONTEXT_PLANES left_context[2]; ENTROPY_CONTEXT_PLANES above_context[2]; ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context + mb_col; - // Offsets to move pointers from MB to MB within a SB in raster order - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - /* Function should not modify L & A contexts; save and restore on exit */ vpx_memcpy(left_context, - cm->left_context, + cm->left_context + (mb_row & 2), sizeof(left_context)); vpx_memcpy(above_context, initial_above_context_ptr, @@ -638,113 +772,36 @@ static void pick_mb_modes(VP9_COMP *cpi, /* Encode MBs in raster order within the SB */ for (i = 0; i < 4; i++) { - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_unextended = dy * cm->mb_cols + dx; - int offset_extended = dy * xd->mode_info_stride + dx; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; + const int x_idx = i & 1, y_idx = i >> 1; + MB_MODE_INFO *mbmi; - // TODO Many of the index items here can be computed more efficiently! - - if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols)) { + if ((mb_row + y_idx >= cm->mb_rows) || (mb_col + x_idx >= cm->mb_cols)) { // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - - // Update pointers - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); - - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif continue; } // Index of the MB in the SB 0..3 xd->mb_index = i; - - map_index = (mb_row * cpi->common.mb_cols) + mb_col; - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - - // set above context pointer - xd->above_context = cm->above_context + mb_col; - - // Restore the appropriate left context depending on which - // row in the SB the MB is situated - xd->left_context = cm->left_context + (i >> 1); - - // Set up distance of MB to edge of frame in 1/8th pel units - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - - // Set up limit values for MV components to prevent them from - // extending beyond the UMV borders assuming 16x16 block size - x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND)); - - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); - - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); - - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; + set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16, + &recon_yoffset, &recon_uvoffset); #if !CONFIG_SUPERBLOCKS // Copy current MB to a work buffer vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); #endif - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); - // Is segmentation enabled - if (xd->segmentation_enabled) { - // Code to set segment id in xd->mbmi.segment_id - if (xd->update_mb_segmentation_map) - mbmi->segment_id = cpi->segmentation_map[map_index]; - else - mbmi->segment_id = cm->last_frame_seg_map[map_index]; - if (mbmi->segment_id > 3) - mbmi->segment_id = 0; - - vp9_mb_init_quantizer(cpi, x); - } else - // Set to Segment 0 by default - mbmi->segment_id = 0; - - x->active_ptr = cpi->active_map + map_index; - + mbmi = &xd->mode_info_context->mbmi; #if CONFIG_SUPERBLOCKS - xd->mode_info_context->mbmi.encoded_as_sb = 0; + mbmi->sb_type = BLOCK_SIZE_MB16X16; #endif cpi->update_context = 0; // TODO Do we need this now?? vp9_intra_prediction_down_copy(xd); -#ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 46 && - mb_row == 5 && mb_col == 2); -#endif // Find best coding mode & reconstruct the MB so it is available // as a predictor for MBs that follow in the SB if (cm->frame_type == KEY_FRAME) { @@ -758,28 +815,16 @@ static void pick_mb_modes(VP9_COMP *cpi, *totaldist += d; // Dummy encode, do not do the tokenization - encode_macroblock(cpi, x, tp, - recon_yoffset, recon_uvoffset, 0, mb_col, mb_row); + encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset, 0, + mb_row + y_idx, mb_col + x_idx); // Note the encoder may have changed the segment_id // Save the coding context - vpx_memcpy(&x->mb_context[i].mic, xd->mode_info_context, + vpx_memcpy(&x->mb_context[xd->sb_index][i].mic, xd->mode_info_context, sizeof(MODE_INFO)); } else { int seg_id, r, d; - if (xd->segmentation_enabled && cpi->seg0_cnt > 0 && - !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME) && - vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME) && - vp9_check_segref(xd, 1, INTRA_FRAME) + - vp9_check_segref(xd, 1, LAST_FRAME) + - vp9_check_segref(xd, 1, GOLDEN_FRAME) + - vp9_check_segref(xd, 1, ALTREF_FRAME) == 1) { - cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; - } else { - cpi->seg0_progress = (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols + i) << 16) / cm->MBs; - } - #ifdef ENC_DEBUG if (enc_debug) printf("inter pick_mb_modes %d %d\n", mb_row, mb_col); @@ -790,8 +835,8 @@ static void pick_mb_modes(VP9_COMP *cpi, *totaldist += d; // Dummy encode, do not do the tokenization - encode_macroblock(cpi, x, tp, - recon_yoffset, recon_uvoffset, 0, mb_col, mb_row); + encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset, 0, + mb_row + y_idx, mb_col + x_idx); seg_id = mbmi->segment_id; if (cpi->mb.e_mbd.segmentation_enabled && seg_id == 0) { @@ -811,28 +856,10 @@ static void pick_mb_modes(VP9_COMP *cpi, cpi->ref_pred_count[pred_context][pred_flag]++; } } - - // Next MB - mb_row += dy; - mb_col += dx; - - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); - - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; - -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif } /* Restore L & A coding context to those in place on entry */ - vpx_memcpy(cm->left_context, + vpx_memcpy(cm->left_context + (mb_row & 2), left_context, sizeof(left_context)); vpx_memcpy(initial_above_context_ptr, @@ -841,392 +868,204 @@ static void pick_mb_modes(VP9_COMP *cpi, } #if CONFIG_SUPERBLOCKS -static void pick_sb_modes (VP9_COMP *cpi, - VP9_COMMON *cm, - int mb_row, - int mb_col, - MACROBLOCK *x, - MACROBLOCKD *xd, - TOKENEXTRA **tp, - int *totalrate, - int *totaldist) -{ - int map_index; +static void pick_sb_modes(VP9_COMP *cpi, + VP9_COMMON *cm, + int mb_row, + int mb_col, + MACROBLOCK *x, + MACROBLOCKD *xd, + TOKENEXTRA **tp, + int *totalrate, + int *totaldist) { int recon_yoffset, recon_uvoffset; - int ref_fb_idx = cm->lst_fb_idx; - int dst_fb_idx = cm->new_fb_idx; - int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - ENTROPY_CONTEXT_PLANES left_context[2]; - ENTROPY_CONTEXT_PLANES above_context[2]; - ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context - + mb_col; - /* Function should not modify L & A contexts; save and restore on exit */ - vpx_memcpy (left_context, - cm->left_context, - sizeof(left_context)); - vpx_memcpy (above_context, - initial_above_context_ptr, - sizeof(above_context)); - - map_index = (mb_row * cpi->common.mb_cols) + mb_col; - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - - /* set above context pointer */ - xd->above_context = cm->above_context + mb_col; - - /* Restore the appropriate left context depending on which - * row in the SB the MB is situated */ - xd->left_context = cm->left_context; - - // Set up distance of MB to edge of frame in 1/8th pel units - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 2 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 2 - mb_col) * 16) << 3; - - /* Set up limit values for MV components to prevent them from - * extending beyond the UMV borders assuming 16x16 block size */ - x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND)); - - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); - - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); - - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; -#if 0 // FIXME - /* Copy current MB to a work buffer */ - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); -#endif - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - if(cpi->oxcf.tuning == VP8_TUNE_SSIM) + set_offsets(cpi, mb_row, mb_col, 32, &recon_yoffset, &recon_uvoffset); + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB32X32; + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); - /* Is segmentation enabled */ - if (xd->segmentation_enabled) - { - /* Code to set segment id in xd->mbmi.segment_id */ - if (xd->update_mb_segmentation_map) - xd->mode_info_context->mbmi.segment_id = - cpi->segmentation_map[map_index] && - cpi->segmentation_map[map_index + 1] && - cpi->segmentation_map[map_index + cm->mb_cols] && - cpi->segmentation_map[map_index + cm->mb_cols + 1]; - else - xd->mode_info_context->mbmi.segment_id = - cm->last_frame_seg_map[map_index] && - cm->last_frame_seg_map[map_index + 1] && - cm->last_frame_seg_map[map_index + cm->mb_cols] && - cm->last_frame_seg_map[map_index + cm->mb_cols + 1]; - if (xd->mode_info_context->mbmi.segment_id > 3) - xd->mode_info_context->mbmi.segment_id = 0; - - vp9_mb_init_quantizer(cpi, x); - } - else - /* Set to Segment 0 by default */ - xd->mode_info_context->mbmi.segment_id = 0; - - x->active_ptr = cpi->active_map + map_index; - cpi->update_context = 0; // TODO Do we need this now?? /* Find best coding mode & reconstruct the MB so it is available * as a predictor for MBs that follow in the SB */ - if (cm->frame_type == KEY_FRAME) - { - vp9_rd_pick_intra_mode_sb(cpi, x, - totalrate, - totaldist); + if (cm->frame_type == KEY_FRAME) { + vp9_rd_pick_intra_mode_sb32(cpi, x, + totalrate, + totaldist); /* Save the coding context */ - vpx_memcpy(&x->sb_context[0].mic, xd->mode_info_context, + vpx_memcpy(&x->sb32_context[xd->sb_index].mic, xd->mode_info_context, sizeof(MODE_INFO)); } else { - if (xd->segmentation_enabled && cpi->seg0_cnt > 0 && - !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME) && - vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME) && - vp9_check_segref(xd, 1, INTRA_FRAME) + - vp9_check_segref(xd, 1, LAST_FRAME) + - vp9_check_segref(xd, 1, GOLDEN_FRAME) + - vp9_check_segref(xd, 1, ALTREF_FRAME) == 1) { - cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; - } else { - cpi->seg0_progress = - (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols) << 16) / cm->MBs; + vp9_rd_pick_inter_mode_sb32(cpi, x, + recon_yoffset, + recon_uvoffset, + totalrate, + totaldist); + } +} + +#if CONFIG_SUPERBLOCKS64 +static void pick_sb64_modes(VP9_COMP *cpi, + VP9_COMMON *cm, + int mb_row, + int mb_col, + MACROBLOCK *x, + MACROBLOCKD *xd, + TOKENEXTRA **tp, + int *totalrate, + int *totaldist) { + int recon_yoffset, recon_uvoffset; + + set_offsets(cpi, mb_row, mb_col, 64, &recon_yoffset, &recon_uvoffset); + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB64X64; + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + vp9_activity_masking(cpi, x); + cpi->update_context = 0; // TODO(rbultje) Do we need this now?? + + /* Find best coding mode & reconstruct the MB so it is available + * as a predictor for MBs that follow in the SB */ + if (cm->frame_type == KEY_FRAME) { + vp9_rd_pick_intra_mode_sb64(cpi, x, + totalrate, + totaldist); + + /* Save the coding context */ + vpx_memcpy(&x->sb64_context.mic, xd->mode_info_context, + sizeof(MODE_INFO)); + } else { + vp9_rd_pick_inter_mode_sb64(cpi, x, + recon_yoffset, + recon_uvoffset, + totalrate, + totaldist); + } +} +#endif // CONFIG_SUPERBLOCKS64 +#endif // CONFIG_SUPERBLOCKS + +static void update_stats(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *mi = xd->mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; + + if (cm->frame_type == KEY_FRAME) { +#ifdef MODE_STATS + y_modes[mbmi->mode]++; +#endif + } else { + int segment_id, seg_ref_active; + + if (mbmi->ref_frame) { + int pred_context = vp9_get_pred_context(cm, xd, PRED_COMP); + + if (mbmi->second_ref_frame <= INTRA_FRAME) + cpi->single_pred_count[pred_context]++; + else + cpi->comp_pred_count[pred_context]++; } - vp9_rd_pick_inter_mode_sb(cpi, x, - recon_yoffset, - recon_uvoffset, - totalrate, - totaldist); - } +#ifdef MODE_STATS + inter_y_modes[mbmi->mode]++; - /* Restore L & A coding context to those in place on entry */ - vpx_memcpy (cm->left_context, - left_context, - sizeof(left_context)); - vpx_memcpy (initial_above_context_ptr, - above_context, - sizeof(above_context)); -} + if (mbmi->mode == SPLITMV) { + int b; + + for (b = 0; b < x->partition_info->count; b++) { + inter_b_modes[x->partition_info->bmi[b].mode]++; + } + } #endif + // If we have just a single reference frame coded for a segment then + // exclude from the reference frame counts used to work out + // probabilities. NOTE: At the moment we dont support custom trees + // for the reference frame coding for each segment but this is a + // possible future action. + segment_id = mbmi->segment_id; + seg_ref_active = vp9_segfeature_active(xd, segment_id, + SEG_LVL_REF_FRAME); + if (!seg_ref_active || + ((vp9_check_segref(xd, segment_id, INTRA_FRAME) + + vp9_check_segref(xd, segment_id, LAST_FRAME) + + vp9_check_segref(xd, segment_id, GOLDEN_FRAME) + + vp9_check_segref(xd, segment_id, ALTREF_FRAME)) > 1)) { + cpi->count_mb_ref_frame_usage[mbmi->ref_frame]++; + } + // Count of last ref frame 0,0 usage + if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) + cpi->inter_zz_count++; + } +} + static void encode_sb(VP9_COMP *cpi, VP9_COMMON *cm, - int mbrow, - int mbcol, + int mb_row, + int mb_col, + int output_enabled, MACROBLOCK *x, MACROBLOCKD *xd, - TOKENEXTRA **tp) { - int i; - int map_index; - int mb_row, mb_col; + TOKENEXTRA **tp, int is_sb) { int recon_yoffset, recon_uvoffset; - int ref_fb_idx = cm->lst_fb_idx; - int dst_fb_idx = cm->new_fb_idx; - int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - - mb_row = mbrow; - mb_col = mbcol; - - /* Encode MBs in raster order within the SB */ - for (i = 0; i < 4; i++) { - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_extended = dy * xd->mode_info_stride + dx; - int offset_unextended = dy * cm->mb_cols + dx; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - - if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols)) { - // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); - - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; - -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif - continue; - } - - xd->mb_index = i; - - // Restore MB state to that when it was picked -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - update_state(cpi, x, &x->sb_context[i]); - cpi->sb_count++; - } else -#endif - update_state(cpi, x, &x->mb_context[i]); - - map_index = (mb_row * cpi->common.mb_cols) + mb_col; - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - - // reset above block coeffs - xd->above_context = cm->above_context + mb_col; - xd->left_context = cm->left_context + (i >> 1); - - // Set up distance of MB to edge of the frame in 1/8th pel units - // Set up limit values for MV components to prevent them from - // extending beyond the UMV borders assuming 32x32 block size - x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND)); + cpi->sb32_count[is_sb]++; + if (is_sb) { + set_offsets(cpi, mb_row, mb_col, 32, &recon_yoffset, &recon_uvoffset); + update_state(cpi, x, &x->sb32_context[xd->sb_index], 32, output_enabled); - xd->mb_to_bottom_edge = ((cm->mb_rows - 2 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 2 - mb_col) * 16) << 3; - } else { -#endif - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND)); - - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; -#if CONFIG_SUPERBLOCKS - } -#endif - - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); - - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); - - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; - -#if !CONFIG_SUPERBLOCKS - // Copy current MB to a work buffer - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); -#endif - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); - - // Is segmentation enabled - if (xd->segmentation_enabled) { - vp9_mb_init_quantizer(cpi, x); - } - - x->active_ptr = cpi->active_map + map_index; - - cpi->update_context = 0; - -#if CONFIG_SUPERBLOCKS - if (!xd->mode_info_context->mbmi.encoded_as_sb) -#endif - vp9_intra_prediction_down_copy(xd); - - if (cm->frame_type == KEY_FRAME) { -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) - encode_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset, - mb_col, mb_row); - else -#endif - encode_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset, 1, - mb_col, mb_row); - // Note the encoder may have changed the segment_id - -#ifdef MODE_STATS - y_modes[mbmi->mode]++; -#endif - } else { - unsigned char *segment_id; - int seg_ref_active; - - if (xd->mode_info_context->mbmi.ref_frame) { - unsigned char pred_context; - - pred_context = vp9_get_pred_context(cm, xd, PRED_COMP); - - if (xd->mode_info_context->mbmi.second_ref_frame <= INTRA_FRAME) - cpi->single_pred_count[pred_context]++; - else - cpi->comp_pred_count[pred_context]++; - } - -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) - encode_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset, - mb_col, mb_row); - else -#endif - encode_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset, 1, - mb_col, mb_row); - // Note the encoder may have changed the segment_id - -#ifdef MODE_STATS - inter_y_modes[mbmi->mode]++; - - if (mbmi->mode == SPLITMV) { - int b; - - for (b = 0; b < x->partition_info->count; b++) { - inter_b_modes[x->partition_info->bmi[b].mode]++; - } - } - -#endif - - // If we have just a single reference frame coded for a segment then - // exclude from the reference frame counts used to work out - // probabilities. NOTE: At the moment we dont support custom trees - // for the reference frame coding for each segment but this is a - // possible future action. - segment_id = &mbmi->segment_id; - seg_ref_active = vp9_segfeature_active(xd, *segment_id, - SEG_LVL_REF_FRAME); - if (!seg_ref_active || - ((vp9_check_segref(xd, *segment_id, INTRA_FRAME) + - vp9_check_segref(xd, *segment_id, LAST_FRAME) + - vp9_check_segref(xd, *segment_id, GOLDEN_FRAME) + - vp9_check_segref(xd, *segment_id, ALTREF_FRAME)) > 1)) { - { - cpi->count_mb_ref_frame_usage[mbmi->ref_frame]++; - } - } - - // Count of last ref frame 0,0 usage - if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) - cpi->inter_zz_count++; - } - -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - x->src.y_buffer += 32; - x->src.u_buffer += 16; - x->src.v_buffer += 16; - - x->gf_active_ptr += 2; - x->partition_info += 2; - xd->mode_info_context += 2; - xd->prev_mode_info_context += 2; + encode_superblock32(cpi, tp, recon_yoffset, recon_uvoffset, + output_enabled, mb_row, mb_col); + if (output_enabled) + update_stats(cpi); + if (output_enabled) { (*tp)->Token = EOSB_TOKEN; (*tp)++; - if (mb_row < cm->mb_rows) cpi->tplist[mb_row].stop = *tp; - break; + if (mb_row < cm->mb_rows) + cpi->tplist[mb_row].stop = *tp; } + } else +#endif + { + int i; + + for (i = 0; i < 4; i++) { + const int x_idx = i & 1, y_idx = i >> 1; + + if ((mb_row + y_idx >= cm->mb_rows) || (mb_col + x_idx >= cm->mb_cols)) { + // MB lies outside frame, move on + continue; + } + + set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16, + &recon_yoffset, &recon_uvoffset); + xd->mb_index = i; + update_state(cpi, x, &x->mb_context[xd->sb_index][i], 16, output_enabled); + +#if !CONFIG_SUPERBLOCKS + // Copy current MB to a work buffer + vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); #endif - // Next MB - mb_row += dy; - mb_col += dx; + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + vp9_activity_masking(cpi, x); - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); + vp9_intra_prediction_down_copy(xd); - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; + encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset, + output_enabled, mb_row + y_idx, mb_col + x_idx); + if (output_enabled) + update_stats(cpi); -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif - (*tp)->Token = EOSB_TOKEN; - (*tp)++; - if (mb_row < cm->mb_rows) cpi->tplist[mb_row].stop = *tp; + if (output_enabled) { + (*tp)->Token = EOSB_TOKEN; + (*tp)++; + if (mb_row + y_idx < cm->mb_rows) + cpi->tplist[mb_row + y_idx].stop = *tp; + } + } } // debug output @@ -1240,14 +1079,54 @@ static void encode_sb(VP9_COMP *cpi, #endif } -static -void encode_sb_row(VP9_COMP *cpi, - VP9_COMMON *cm, - int mb_row, - MACROBLOCK *x, - MACROBLOCKD *xd, - TOKENEXTRA **tp, - int *totalrate) { +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +static void encode_sb64(VP9_COMP *cpi, + VP9_COMMON *cm, + int mb_row, + int mb_col, + MACROBLOCK *x, + MACROBLOCKD *xd, + TOKENEXTRA **tp, int is_sb[4]) { + cpi->sb64_count[is_sb[0] == 2]++; + if (is_sb[0] == 2) { + int recon_yoffset, recon_uvoffset; + + set_offsets(cpi, mb_row, mb_col, 64, &recon_yoffset, &recon_uvoffset); + update_state(cpi, x, &x->sb64_context, 64, 1); + encode_superblock64(cpi, tp, recon_yoffset, recon_uvoffset, + 1, mb_row, mb_col); + update_stats(cpi); + + (*tp)->Token = EOSB_TOKEN; + (*tp)++; + if (mb_row < cm->mb_rows) + cpi->tplist[mb_row].stop = *tp; + } else { + int i; + + for (i = 0; i < 4; i++) { + const int x_idx = i & 1, y_idx = i >> 1; + + if (mb_row + y_idx * 2 >= cm->mb_rows || + mb_col + x_idx * 2 >= cm->mb_cols) { + // MB lies outside frame, move on + continue; + } + xd->sb_index = i; + encode_sb(cpi, cm, mb_row + 2 * y_idx, mb_col + 2 * x_idx, 1, x, xd, tp, + is_sb[i]); + } + } +} +#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + +static void encode_sb_row(VP9_COMP *cpi, + VP9_COMMON *cm, + int mb_row, + MACROBLOCK *x, + MACROBLOCKD *xd, + TOKENEXTRA **tp, + int *totalrate) { int mb_col; int mb_cols = cm->mb_cols; @@ -1255,105 +1134,103 @@ void encode_sb_row(VP9_COMP *cpi, vpx_memset(cm->left_context, 0, sizeof(cm->left_context)); // Code each SB in the row - for (mb_col = 0; mb_col < mb_cols; mb_col += 2) { - int mb_rate = 0, mb_dist = 0; + for (mb_col = 0; mb_col < mb_cols; mb_col += 4) { + int i; + int sb32_rate = 0, sb32_dist = 0; + int is_sb[4]; +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + int sb64_rate = INT_MAX, sb64_dist; + ENTROPY_CONTEXT_PLANES l[4], a[4]; + TOKENEXTRA *tp_orig = *tp; + + memcpy(&a, cm->above_context + mb_col, sizeof(a)); + memcpy(&l, cm->left_context, sizeof(l)); +#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + for (i = 0; i < 4; i++) { + const int x_idx = (i & 1) << 1, y_idx = i & 2; + int mb_rate = 0, mb_dist = 0; #if CONFIG_SUPERBLOCKS - int sb_rate = INT_MAX, sb_dist; + int sb_rate = INT_MAX, sb_dist; #endif -#if CONFIG_DEBUG - MODE_INFO *mic = xd->mode_info_context; - PARTITION_INFO *pi = x->partition_info; - signed char *gfa = x->gf_active_ptr; - uint8_t *yb = x->src.y_buffer; - uint8_t *ub = x->src.u_buffer; - uint8_t *vb = x->src.v_buffer; + if (mb_row + y_idx >= cm->mb_rows || mb_col + x_idx >= cm->mb_cols) + continue; + + xd->sb_index = i; + + pick_mb_modes(cpi, cm, mb_row + y_idx, mb_col + x_idx, + x, xd, tp, &mb_rate, &mb_dist); +#if CONFIG_SUPERBLOCKS + mb_rate += vp9_cost_bit(cm->sb32_coded, 0); #endif #if CONFIG_SUPERBLOCKS - // Pick modes assuming the SB is coded as 4 independent MBs - xd->mode_info_context->mbmi.encoded_as_sb = 0; + if (!((( mb_cols & 1) && mb_col + x_idx == mb_cols - 1) || + ((cm->mb_rows & 1) && mb_row + y_idx == cm->mb_rows - 1))) { + /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ + pick_sb_modes(cpi, cm, mb_row + y_idx, mb_col + x_idx, + x, xd, tp, &sb_rate, &sb_dist); + sb_rate += vp9_cost_bit(cm->sb32_coded, 1); + } + + /* Decide whether to encode as a SB or 4xMBs */ + if (sb_rate < INT_MAX && + RDCOST(x->rdmult, x->rddiv, sb_rate, sb_dist) < + RDCOST(x->rdmult, x->rddiv, mb_rate, mb_dist)) { + is_sb[i] = 1; + sb32_rate += sb_rate; + sb32_dist += sb_dist; + } else #endif - pick_mb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &mb_rate, &mb_dist); + { #if CONFIG_SUPERBLOCKS - mb_rate += vp9_cost_bit(cm->sb_coded, 0); + is_sb[i] = 0; #endif + sb32_rate += mb_rate; + sb32_dist += mb_dist; + } - x->src.y_buffer -= 32; - x->src.u_buffer -= 16; - x->src.v_buffer -= 16; + /* Encode SB using best computed mode(s) */ + // FIXME(rbultje): there really shouldn't be any need to encode_mb/sb + // for each level that we go up, we can just keep tokens and recon + // pixels of the lower level; also, inverting SB/MB order (big->small + // instead of small->big) means we can use as threshold for small, which + // may enable breakouts if RD is not good enough (i.e. faster) + encode_sb(cpi, cm, mb_row + y_idx, mb_col + x_idx, + !(CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64), + x, xd, tp, is_sb[i]); + } - x->gf_active_ptr -= 2; - x->partition_info -= 2; - xd->mode_info_context -= 2; - xd->prev_mode_info_context -= 2; +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + memcpy(cm->above_context + mb_col, &a, sizeof(a)); + memcpy(cm->left_context, &l, sizeof(l)); + sb32_rate += vp9_cost_bit(cm->sb64_coded, 0); -#if CONFIG_DEBUG - assert(x->gf_active_ptr == gfa); - assert(x->partition_info == pi); - assert(xd->mode_info_context == mic); - assert(x->src.y_buffer == yb); - assert(x->src.u_buffer == ub); - assert(x->src.v_buffer == vb); -#endif - -#if CONFIG_SUPERBLOCKS - if (!((( mb_cols & 1) && mb_col == mb_cols - 1) || - ((cm->mb_rows & 1) && mb_row == cm->mb_rows - 1))) { - /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ - xd->mode_info_context->mbmi.encoded_as_sb = 1; - pick_sb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &sb_rate, &sb_dist); - sb_rate += vp9_cost_bit(cm->sb_coded, 1); + if (!((( mb_cols & 3) && mb_col + 3 >= mb_cols) || + ((cm->mb_rows & 3) && mb_row + 3 >= cm->mb_rows))) { + pick_sb64_modes(cpi, cm, mb_row, mb_col, + x, xd, tp, &sb64_rate, &sb64_dist); + sb64_rate += vp9_cost_bit(cm->sb64_coded, 1); } /* Decide whether to encode as a SB or 4xMBs */ - if (sb_rate < INT_MAX && - RDCOST(x->rdmult, x->rddiv, sb_rate, sb_dist) < - RDCOST(x->rdmult, x->rddiv, mb_rate, mb_dist)) { - xd->mode_info_context->mbmi.encoded_as_sb = 1; - xd->mode_info_context[1].mbmi.encoded_as_sb = 1; - xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 1; - xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 1; - *totalrate += sb_rate; + if (sb64_rate < INT_MAX && + RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist) < + RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) { + is_sb[0] = 2; + *totalrate += sb64_rate; } else #endif { -#if CONFIG_SUPERBLOCKS - xd->mode_info_context->mbmi.encoded_as_sb = 0; - if (cm->mb_cols - 1 > mb_col) - xd->mode_info_context[1].mbmi.encoded_as_sb = 0; - if (cm->mb_rows - 1 > mb_row) { - xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 0; - if (cm->mb_cols - 1 > mb_col) - xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 0; - } -#endif - *totalrate += mb_rate; + *totalrate += sb32_rate; } - /* Encode SB using best computed mode(s) */ - encode_sb(cpi, cm, mb_row, mb_col, x, xd, tp); - -#if CONFIG_DEBUG - assert(x->gf_active_ptr == gfa + 2); - assert(x->partition_info == pi + 2); - assert(xd->mode_info_context == mic + 2); - assert(x->src.y_buffer == yb + 32); - assert(x->src.u_buffer == ub + 16); - assert(x->src.v_buffer == vb + 16); -#endif +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + assert(tp_orig == *tp); + encode_sb64(cpi, cm, mb_row, mb_col, x, xd, tp, is_sb); + assert(tp_orig < *tp); +#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 } - - // this is to account for the border - x->gf_active_ptr += mb_cols - (mb_cols & 0x1); - x->partition_info += xd->mode_info_stride + 1 - (mb_cols & 0x1); - xd->mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1); - xd->prev_mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1); - -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif } static void init_encode_frame_mb_context(VP9_COMP *cpi) { @@ -1361,22 +1238,11 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - // GF active flags data structure - x->gf_active_ptr = (signed char *)cpi->gf_active_flags; - - // Activity map pointer - x->mb_activity_ptr = cpi->mb_activity_map; - x->act_zbin_adj = 0; cpi->seg0_idx = 0; vpx_memset(cpi->ref_pred_count, 0, sizeof(cpi->ref_pred_count)); - x->partition_info = x->pi; - - xd->mode_info_context = cm->mi; xd->mode_info_stride = cm->mode_info_stride; - xd->prev_mode_info_context = cm->prev_mi; - xd->frame_type = cm->frame_type; xd->frames_since_golden = cm->frames_since_golden; @@ -1387,7 +1253,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_init_mbmode_probs(cm); // Copy data over into macro block data structures. - x->src = * cpi->Source; + x->src = *cpi->Source; xd->pre = cm->yv12_fb[cm->lst_fb_idx]; xd->dst = cm->yv12_fb[cm->new_fb_idx]; @@ -1413,8 +1279,11 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_zero(cpi->common.fc.mv_ref_ct) #if CONFIG_SUPERBLOCKS vp9_zero(cpi->sb_ymode_count) - cpi->sb_count = 0; -#endif + vp9_zero(cpi->sb32_count); +#if CONFIG_SUPERBLOCKS64 + vp9_zero(cpi->sb64_count); +#endif // CONFIG_SUPERBLOCKS64 +#endif // CONFIG_SUPERBLOCKS #if CONFIG_COMP_INTERINTRA_PRED vp9_zero(cpi->interintra_count); vp9_zero(cpi->interintra_select_count); @@ -1527,15 +1396,8 @@ static void encode_frame_internal(VP9_COMP *cpi) { { // For each row of SBs in the frame - for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 2) { - int offset = (cm->mb_cols + 1) & ~0x1; - + for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) { encode_sb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate); - - // adjust to the next row of SBs - x->src.y_buffer += 32 * x->src.y_stride - 16 * offset; - x->src.u_buffer += 16 * x->src.uv_stride - 8 * offset; - x->src.v_buffer += 16 * x->src.uv_stride - 8 * offset; } cpi->tok_count = (unsigned int)(tp - cpi->tok); @@ -1580,78 +1442,150 @@ static int check_dual_ref_flags(VP9_COMP *cpi) { } } -static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { - VP9_COMMON *cm = &cpi->common; - int mb_row, mb_col, mis = cm->mode_info_stride, segment_id; - MODE_INFO *mi, *mi_ptr = cm->mi; -#if CONFIG_SUPERBLOCKS - int skip; - MODE_INFO *sb_mi_ptr = cm->mi, *sb_mi; - MB_MODE_INFO *sb_mbmi; -#endif - MB_MODE_INFO *mbmi; - MACROBLOCK *x = &cpi->mb; - MACROBLOCKD *xd = &x->e_mbd; +static void reset_skip_txfm_size_mb(VP9_COMP *cpi, + MODE_INFO *mi, TX_SIZE txfm_max) { + MB_MODE_INFO *const mbmi = &mi->mbmi; + + if (mbmi->txfm_size > txfm_max) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int segment_id = mbmi->segment_id; + + xd->mode_info_context = mi; + assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || + (cm->mb_no_coeff_skip && mbmi->mb_skip_coeff)); + mbmi->txfm_size = txfm_max; + } +} - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++, mi_ptr += mis) { - mi = mi_ptr; #if CONFIG_SUPERBLOCKS - sb_mi = sb_mi_ptr; -#endif - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++, mi++) { - mbmi = &mi->mbmi; -#if CONFIG_SUPERBLOCKS - sb_mbmi = &sb_mi->mbmi; -#endif - if (mbmi->txfm_size > txfm_max) { -#if CONFIG_SUPERBLOCKS - if (sb_mbmi->encoded_as_sb) { - if (!((mb_col & 1) || (mb_row & 1))) { - segment_id = mbmi->segment_id; - skip = mbmi->mb_skip_coeff; - if (mb_col < cm->mb_cols - 1) { - segment_id = segment_id && mi[1].mbmi.segment_id; - skip = skip && mi[1].mbmi.mb_skip_coeff; - } - if (mb_row < cm->mb_rows - 1) { - segment_id = segment_id && - mi[cm->mode_info_stride].mbmi.segment_id; - skip = skip && mi[cm->mode_info_stride].mbmi.mb_skip_coeff; - if (mb_col < cm->mb_cols - 1) { - segment_id = segment_id && - mi[cm->mode_info_stride + 1].mbmi.segment_id; - skip = skip && mi[cm->mode_info_stride + 1].mbmi.mb_skip_coeff; - } - } - xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || - (cm->mb_no_coeff_skip && skip)); - mbmi->txfm_size = txfm_max; - } else { - mbmi->txfm_size = sb_mbmi->txfm_size; - } - } else { -#endif - segment_id = mbmi->segment_id; - xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || - (cm->mb_no_coeff_skip && mbmi->mb_skip_coeff)); - mbmi->txfm_size = txfm_max; -#if CONFIG_SUPERBLOCKS - } -#endif - } -#if CONFIG_SUPERBLOCKS - if (mb_col & 1) - sb_mi += 2; -#endif +static int get_skip_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs) { + int x, y; + + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + if (!mi[y * mis + x].mbmi.mb_skip_coeff) + return 0; } -#if CONFIG_SUPERBLOCKS - if (mb_row & 1) - sb_mi_ptr += 2 * mis; + } + + return 1; +} + +static void set_txfm_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs, + TX_SIZE txfm_size) { + int x, y; + + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + mi[y * mis + x].mbmi.txfm_size = txfm_size; + } + } +} + +static void reset_skip_txfm_size_sb32(VP9_COMP *cpi, MODE_INFO *mi, + int mis, TX_SIZE txfm_max, + int mb_rows_left, int mb_cols_left) { + MB_MODE_INFO *const mbmi = &mi->mbmi; + + if (mbmi->txfm_size > txfm_max) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int segment_id = mbmi->segment_id; + const int ymbs = MIN(2, mb_rows_left); + const int xmbs = MIN(2, mb_cols_left); + + xd->mode_info_context = mi; + assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || + (cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs))); + set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max); + } +} + +#if CONFIG_SUPERBLOCKS64 +static void reset_skip_txfm_size_sb64(VP9_COMP *cpi, MODE_INFO *mi, + int mis, TX_SIZE txfm_max, + int mb_rows_left, int mb_cols_left) { + MB_MODE_INFO *const mbmi = &mi->mbmi; + + if (mbmi->txfm_size > txfm_max) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int segment_id = mbmi->segment_id; + const int ymbs = MIN(4, mb_rows_left); + const int xmbs = MIN(4, mb_cols_left); + + xd->mode_info_context = mi; + assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || + (cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs))); + set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max); + } +} #endif +#endif + +static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { + VP9_COMMON *const cm = &cpi->common; + int mb_row, mb_col; + const int mis = cm->mode_info_stride; + MODE_INFO *mi, *mi_ptr = cm->mi; + + for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { + mi = mi_ptr; + for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) { +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + reset_skip_txfm_size_sb64(cpi, mi, mis, txfm_max, + cm->mb_rows - mb_row, cm->mb_cols - mb_col); + } else +#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + { + int i; + + for (i = 0; i < 4; i++) { + const int x_idx_sb = (i & 1) << 1, y_idx_sb = i & 2; +#if CONFIG_SUPERBLOCKS + MODE_INFO *sb_mi = mi + y_idx_sb * mis + x_idx_sb; +#endif + + if (mb_row + y_idx_sb >= cm->mb_rows || + mb_col + x_idx_sb >= cm->mb_cols) + continue; + +#if CONFIG_SUPERBLOCKS + if (sb_mi->mbmi.sb_type) { + reset_skip_txfm_size_sb32(cpi, sb_mi, mis, txfm_max, + cm->mb_rows - mb_row - y_idx_sb, + cm->mb_cols - mb_col - x_idx_sb); + } else +#endif + { + int m; + + for (m = 0; m < 4; m++) { + const int x_idx = x_idx_sb + (m & 1), y_idx = y_idx_sb + (m >> 1); + MODE_INFO *mb_mi; + + if (mb_col + x_idx >= cm->mb_cols || + mb_row + y_idx >= cm->mb_rows) + continue; + + mb_mi = mi + y_idx * mis + x_idx; +#if CONFIG_SUPERBLOCKS + assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16); +#endif + reset_skip_txfm_size_mb(cpi, mb_mi, txfm_max); + } + } + } + } + } } } @@ -1961,7 +1895,7 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { #endif #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { + if (xd->mode_info_context->mbmi.sb_type) { ++cpi->sb_ymode_count[m]; } else #endif @@ -2014,9 +1948,9 @@ static void update_sb_skip_coeff_state(VP9_COMP *cpi, ENTROPY_CONTEXT_PLANES tl[4], TOKENEXTRA *t[4], TOKENEXTRA **tp, - int skip[4]) + int skip[4], int output_enabled) { - TOKENEXTRA tokens[4][16 * 24]; + TOKENEXTRA tokens[4][16 * 25]; int n_tokens[4], n; // if there were no skips, we don't need to do anything @@ -2056,7 +1990,7 @@ static void update_sb_skip_coeff_state(VP9_COMP *cpi, if (skip[n]) { x->e_mbd.above_context = &ta[n]; x->e_mbd.left_context = &tl[n]; - vp9_stuff_mb(cpi, &x->e_mbd, tp, 0); + vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled); } else { if (n_tokens[n]) { memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); @@ -2065,22 +1999,135 @@ static void update_sb_skip_coeff_state(VP9_COMP *cpi, } } } + +#if CONFIG_SUPERBLOCKS64 +static void update_sb64_skip_coeff_state(VP9_COMP *cpi, + MACROBLOCK *x, + ENTROPY_CONTEXT_PLANES ta[16], + ENTROPY_CONTEXT_PLANES tl[16], + TOKENEXTRA *t[16], + TOKENEXTRA **tp, + int skip[16], int output_enabled) { + if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_32X32) { + TOKENEXTRA tokens[4][1024+512]; + int n_tokens[4], n; + + // if there were no skips, we don't need to do anything + if (!skip[0] && !skip[1] && !skip[2] && !skip[3]) + return; + + // if we don't do coeff skipping for this frame, we don't + // need to do anything here + if (!cpi->common.mb_no_coeff_skip) + return; + + // if all 4 MBs skipped coeff coding, nothing to be done + if (skip[0] && skip[1] && skip[2] && skip[3]) + return; + + // so the situation now is that we want to skip coeffs + // for some MBs, but not all, and we didn't code EOB + // coefficients for them. However, the skip flag for this + // SB will be 0 overall, so we need to insert EOBs in the + // middle of the token tree. Do so here. + for (n = 0; n < 4; n++) { + if (n < 3) { + n_tokens[n] = t[n + 1] - t[n]; + } else { + n_tokens[n] = *tp - t[3]; + } + if (n_tokens[n]) { + memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0])); + } + } + + // reset pointer, stuff EOBs where necessary + *tp = t[0]; + for (n = 0; n < 4; n++) { + if (skip[n]) { + x->e_mbd.above_context = &ta[n * 2]; + x->e_mbd.left_context = &tl[n * 2]; + vp9_stuff_sb(cpi, &x->e_mbd, tp, !output_enabled); + } else { + if (n_tokens[n]) { + memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); + } + (*tp) += n_tokens[n]; + } + } + } else { + TOKENEXTRA tokens[16][16 * 25]; + int n_tokens[16], n; + + // if there were no skips, we don't need to do anything + if (!skip[ 0] && !skip[ 1] && !skip[ 2] && !skip[ 3] && + !skip[ 4] && !skip[ 5] && !skip[ 6] && !skip[ 7] && + !skip[ 8] && !skip[ 9] && !skip[10] && !skip[11] && + !skip[12] && !skip[13] && !skip[14] && !skip[15]) + return; + + // if we don't do coeff skipping for this frame, we don't + // need to do anything here + if (!cpi->common.mb_no_coeff_skip) + return; + + // if all 4 MBs skipped coeff coding, nothing to be done + if (skip[ 0] && skip[ 1] && skip[ 2] && skip[ 3] && + skip[ 4] && skip[ 5] && skip[ 6] && skip[ 7] && + skip[ 8] && skip[ 9] && skip[10] && skip[11] && + skip[12] && skip[13] && skip[14] && skip[15]) + return; + + // so the situation now is that we want to skip coeffs + // for some MBs, but not all, and we didn't code EOB + // coefficients for them. However, the skip flag for this + // SB will be 0 overall, so we need to insert EOBs in the + // middle of the token tree. Do so here. + for (n = 0; n < 16; n++) { + if (n < 15) { + n_tokens[n] = t[n + 1] - t[n]; + } else { + n_tokens[n] = *tp - t[15]; + } + if (n_tokens[n]) { + memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0])); + } + } + + // reset pointer, stuff EOBs where necessary + *tp = t[0]; + for (n = 0; n < 16; n++) { + if (skip[n]) { + x->e_mbd.above_context = &ta[n]; + x->e_mbd.left_context = &tl[n]; + vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled); + } else { + if (n_tokens[n]) { + memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); + } + (*tp) += n_tokens[n]; + } + } + } +} +#endif // CONFIG_SUPERBLOCKS64 #endif /* CONFIG_SUPERBLOCKS */ -static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int output_enabled, - int mb_col, int mb_row) { - VP9_COMMON *cm = &cpi->common; +static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, + int mb_row, int mb_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; unsigned char *segment_id = &mbmi->segment_id; int seg_ref_active; unsigned char ref_pred_flag; x->skip = 0; #if CONFIG_SUPERBLOCKS - assert(!xd->mode_info_context->mbmi.encoded_as_sb); + assert(!xd->mode_info_context->mbmi.sb_type); #endif #ifdef ENC_DEBUG @@ -2332,10 +2379,11 @@ static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, } #if CONFIG_SUPERBLOCKS -static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int mb_col, int mb_row) { +static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, int mb_row, int mb_col) { VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; const uint8_t *src = x->src.y_buffer; uint8_t *dst = xd->dst.y_buffer; @@ -2403,7 +2451,8 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { vp9_build_intra_predictors_sby_s(&x->e_mbd); vp9_build_intra_predictors_sbuv_s(&x->e_mbd); - sum_intra_stats(cpi, x); + if (output_enabled) + sum_intra_stats(cpi, x); } else { int ref_fb_idx; @@ -2461,7 +2510,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, vp9_recon_sbuv_s_c(&x->e_mbd, udst, vdst); if (!x->skip) { - vp9_tokenize_sb(cpi, &x->e_mbd, t, 0); + vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled); } else { int mb_skip_context = cpi->common.mb_no_coeff_skip ? @@ -2470,11 +2519,13 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, 0; mi->mbmi.mb_skip_coeff = 1; if (cm->mb_no_coeff_skip) { - cpi->skip_true_count[mb_skip_context]++; + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; vp9_fix_contexts_sb(xd); } else { - vp9_stuff_sb(cpi, xd, t, 0); - cpi->skip_false_count[mb_skip_context]++; + vp9_stuff_sb(cpi, xd, t, !output_enabled); + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; } } @@ -2493,7 +2544,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; - xd->left_context = cm->left_context + y_idx; + xd->left_context = cm->left_context + y_idx + (mb_row & 2); xd->above_context = cm->above_context + mb_col + x_idx; memcpy(&ta[n], xd->above_context, sizeof(ta[n])); memcpy(&tl[n], xd->left_context, sizeof(tl[n])); @@ -2520,7 +2571,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); if (!x->skip) { - vp9_tokenize_mb(cpi, &x->e_mbd, t, 0); + vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; } else { int mb_skip_context = @@ -2531,42 +2582,327 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1; if (cpi->common.mb_no_coeff_skip) { // TODO(rbultje) this should be done per-sb instead of per-mb? - cpi->skip_true_count[mb_skip_context]++; + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; vp9_reset_mb_tokens_context(xd); } else { - vp9_stuff_mb(cpi, xd, t, 0); + vp9_stuff_mb(cpi, xd, t, !output_enabled); // TODO(rbultje) this should be done per-sb instead of per-mb? - cpi->skip_false_count[mb_skip_context]++; + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; } } } xd->mode_info_context = mi; - update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip); + update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip, output_enabled); #if CONFIG_TX32X32 } #endif - if (cm->txfm_mode == TX_MODE_SELECT && - !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { - cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; + if (output_enabled) { + if (cm->txfm_mode == TX_MODE_SELECT && + !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) || + (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { + cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; + } else { + TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? +#if CONFIG_TX32X32 + TX_32X32 : +#else + TX_16X16 : +#endif + cm->txfm_mode; + mi->mbmi.txfm_size = sz; + if (mb_col < cm->mb_cols - 1) + mi[1].mbmi.txfm_size = sz; + if (mb_row < cm->mb_rows - 1) { + mi[mis].mbmi.txfm_size = sz; + if (mb_col < cm->mb_cols - 1) + mi[mis + 1].mbmi.txfm_size = sz; + } + } + } +} + +#if CONFIG_SUPERBLOCKS64 +static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, int mb_row, int mb_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const uint8_t *src = x->src.y_buffer; + uint8_t *dst = xd->dst.y_buffer; + const uint8_t *usrc = x->src.u_buffer; + uint8_t *udst = xd->dst.u_buffer; + const uint8_t *vsrc = x->src.v_buffer; + uint8_t *vdst = xd->dst.v_buffer; + int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; + int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; + int seg_ref_active; + unsigned char ref_pred_flag; + int n; + TOKENEXTRA *tp[16]; + int skip[16]; + MODE_INFO *mi = x->e_mbd.mode_info_context; + unsigned int segment_id = mi->mbmi.segment_id; + ENTROPY_CONTEXT_PLANES ta[16], tl[16]; + const int mis = cm->mode_info_stride; + + x->skip = 0; + + if (cm->frame_type == KEY_FRAME) { + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { + adjust_act_zbin(cpi, x); + vp9_update_zbin_extra(cpi, x); + } } else { - TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? + vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, cm); + + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { + // Adjust the zbin based on this MB rate. + adjust_act_zbin(cpi, x); + } + + // Experimental code. Special case for gf and arf zeromv modes. + // Increase zbin size to suppress noise + cpi->zbin_mode_boost = 0; + if (cpi->zbin_mode_boost_enabled) { + if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { + if (xd->mode_info_context->mbmi.mode == ZEROMV) { + if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) + cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + else + cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; + } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { + cpi->zbin_mode_boost = 0; + } else { + cpi->zbin_mode_boost = MV_ZBIN_BOOST; + } + } + } + + vp9_update_zbin_extra(cpi, x); + + seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME); + + // SET VARIOUS PREDICTION FLAGS + + // Did the chosen reference frame match its predicted value. + ref_pred_flag = ((xd->mode_info_context->mbmi.ref_frame == + vp9_get_pred_ref(cm, xd))); + vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag); + } + + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { + vp9_build_intra_predictors_sb64y_s(&x->e_mbd); + vp9_build_intra_predictors_sb64uv_s(&x->e_mbd); + if (output_enabled) + sum_intra_stats(cpi, x); + } else { + int ref_fb_idx; + + assert(cm->frame_type != KEY_FRAME); + + if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) + ref_fb_idx = cpi->common.lst_fb_idx; + else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) + ref_fb_idx = cpi->common.gld_fb_idx; + else + ref_fb_idx = cpi->common.alt_fb_idx; + + xd->pre.y_buffer = + cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; + xd->pre.u_buffer = + cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; + xd->pre.v_buffer = + cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; + + if (xd->mode_info_context->mbmi.second_ref_frame > 0) { + int second_ref_fb_idx; + + if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) + second_ref_fb_idx = cpi->common.lst_fb_idx; + else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) + second_ref_fb_idx = cpi->common.gld_fb_idx; + else + second_ref_fb_idx = cpi->common.alt_fb_idx; + + xd->second_pre.y_buffer = + cpi->common.yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset; + xd->second_pre.u_buffer = + cpi->common.yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset; + xd->second_pre.v_buffer = + cpi->common.yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset; + } + + vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + } + +#if CONFIG_TX32X32 + if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { + int n; + + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; + + xd->mode_info_context = mi + x_idx * 2 + mis * y_idx * 2; + xd->left_context = cm->left_context + (y_idx << 1); + xd->above_context = cm->above_context + mb_col + (x_idx << 1); + memcpy(&ta[n * 2], xd->above_context, sizeof(*ta) * 2); + memcpy(&tl[n * 2], xd->left_context, sizeof(*tl) * 2); + tp[n] = *t; + xd->mode_info_context = mi + x_idx * 2 + y_idx * mis * 2; + vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, + src + x_idx * 32 + y_idx * 32 * src_y_stride, + src_y_stride, + dst + x_idx * 32 + y_idx * 32 * dst_y_stride, + dst_y_stride); + vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + usrc + x_idx * 16 + y_idx * 16 * src_uv_stride, + vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride, + src_uv_stride, + udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + dst_uv_stride); + vp9_transform_sby_32x32(x); + vp9_transform_sbuv_16x16(x); + vp9_quantize_sby_32x32(x); + vp9_quantize_sbuv_16x16(x); + // TODO(rbultje): trellis optimize + vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); + vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); + vp9_recon_sby_s_c(&x->e_mbd, + dst + 32 * x_idx + 32 * y_idx * dst_y_stride, + dst_y_stride); + vp9_recon_sbuv_s_c(&x->e_mbd, + udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride); + + if (!x->skip) { + vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled); + } else { + int mb_skip_context = cpi->common.mb_no_coeff_skip ? + (mi - 1)->mbmi.mb_skip_coeff + + (mi - mis)->mbmi.mb_skip_coeff : 0; + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + if (cm->mb_no_coeff_skip) { + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; + vp9_fix_contexts_sb(xd); + } else { + vp9_stuff_sb(cpi, xd, t, !output_enabled); + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; + } + } + + // copy skip flag on all mb_mode_info contexts in this SB + // if this was a skip at this txfm size + if (mb_col + x_idx * 2 < cm->mb_cols - 1) + mi[mis * y_idx * 2 + x_idx * 2 + 1].mbmi.mb_skip_coeff = + mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; + if (mb_row + y_idx * 2 < cm->mb_rows - 1) { + mi[mis * y_idx * 2 + x_idx * 2 + mis].mbmi.mb_skip_coeff = + mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; + if (mb_col + x_idx * 2 < cm->mb_cols - 1) + mi[mis * y_idx * 2 + x_idx * 2 + mis + 1].mbmi.mb_skip_coeff = + mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; + } + skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; + } + } else +#endif + { + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + xd->left_context = cm->left_context + y_idx; + xd->above_context = cm->above_context + mb_col + x_idx; + memcpy(&ta[n], xd->above_context, sizeof(ta[n])); + memcpy(&tl[n], xd->left_context, sizeof(tl[n])); + tp[n] = *t; + xd->mode_info_context = mi + x_idx + y_idx * mis; + + vp9_subtract_mby_s_c(x->src_diff, + src + x_idx * 16 + y_idx * 16 * src_y_stride, + src_y_stride, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride, + dst_y_stride); + vp9_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + vp9_fidct_mb(x); + vp9_recon_mby_s_c(&x->e_mbd, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride); + vp9_recon_mbuv_s_c(&x->e_mbd, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); + + if (!x->skip) { + vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); + skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; + } else { + int mb_skip_context = cpi->common.mb_no_coeff_skip ? + (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + + (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff : 0; + xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1; + if (cpi->common.mb_no_coeff_skip) { + // TODO(rbultje) this should be done per-sb instead of per-mb? + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; + vp9_reset_mb_tokens_context(xd); + } else { + vp9_stuff_mb(cpi, xd, t, !output_enabled); + // TODO(rbultje) this should be done per-sb instead of per-mb? + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; + } + } + } + } + + xd->mode_info_context = mi; + update_sb64_skip_coeff_state(cpi, x, ta, tl, tp, t, skip, output_enabled); + + if (output_enabled) { + if (cm->txfm_mode == TX_MODE_SELECT && + !((cm->mb_no_coeff_skip && + ((mi->mbmi.txfm_size == TX_32X32 && + skip[0] && skip[1] && skip[2] && skip[3]) || + (mi->mbmi.txfm_size != TX_32X32 && + skip[0] && skip[1] && skip[2] && skip[3] && + skip[4] && skip[5] && skip[6] && skip[7] && + skip[8] && skip[9] && skip[10] && skip[11] && + skip[12] && skip[13] && skip[14] && skip[15]))) || + (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { + cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; + } else { + int x, y; + TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? #if CONFIG_TX32X32 TX_32X32 : #else TX_16X16 : #endif cm->txfm_mode; - mi->mbmi.txfm_size = sz; - if (mb_col < cm->mb_cols - 1) - mi[1].mbmi.txfm_size = sz; - if (mb_row < cm->mb_rows - 1) { - mi[mis].mbmi.txfm_size = sz; - if (mb_col < cm->mb_cols - 1) - mi[mis + 1].mbmi.txfm_size = sz; + for (y = 0; y < 4; y++) { + for (x = 0; x < 4; x++) { + if (mb_col + x < cm->mb_cols && mb_row + y < cm->mb_rows) { + mi[mis * y + x].mbmi.txfm_size = sz; + } + } + } } } } +#endif // CONFIG_SUPERBLOCKS64 #endif diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 6bce1adbf..38a2eab62 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -17,6 +17,7 @@ #include #include #include "vp9/common/vp9_findnearmv.h" +#include "vp9/common/vp9_common.h" #ifdef ENTROPY_STATS static int mv_ref_ct [31] [4] [2]; @@ -241,9 +242,6 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { }, \ v = INT_MAX;) -#define MIN(x,y) (((x)<(y))?(x):(y)) -#define MAX(x,y) (((x)>(y))?(x):(y)) - int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 9b186c2c4..44f20adbe 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -556,43 +556,19 @@ static void print_seg_map(VP9_COMP *cpi) { } static void update_reference_segmentation_map(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - int row, col, sb_rows = (cm->mb_rows + 1) >> 1, sb_cols = (cm->mb_cols + 1) >> 1; - MODE_INFO *mi = cm->mi; - uint8_t *segmap = cpi->segmentation_map; - uint8_t *segcache = cm->last_frame_seg_map; + VP9_COMMON *const cm = &cpi->common; + int row, col; + MODE_INFO *mi, *mi_ptr = cm->mi; + uint8_t *cache_ptr = cm->last_frame_seg_map, *cache; - for (row = 0; row < sb_rows; row++) { - for (col = 0; col < sb_cols; col++) { - MODE_INFO *miptr = mi + col * 2; - uint8_t *cache = segcache + col * 2; -#if CONFIG_SUPERBLOCKS - if (miptr->mbmi.encoded_as_sb) { - cache[0] = miptr->mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[1] = miptr->mbmi.segment_id; - if (!(cm->mb_rows & 1) || row < sb_rows - 1) { - cache[cm->mb_cols] = miptr->mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[cm->mb_cols + 1] = miptr->mbmi.segment_id; - } - } else -#endif - { - cache[0] = miptr[0].mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[1] = miptr[1].mbmi.segment_id; - if (!(cm->mb_rows & 1) || row < sb_rows - 1) { - cache[cm->mb_cols] = miptr[cm->mode_info_stride].mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[1] = miptr[1].mbmi.segment_id; - cache[cm->mb_cols + 1] = miptr[cm->mode_info_stride + 1].mbmi.segment_id; - } - } + for (row = 0; row < cm->mb_rows; row++) { + mi = mi_ptr; + cache = cache_ptr; + for (col = 0; col < cm->mb_cols; col++, mi++, cache++) { + cache[0] = mi->mbmi.segment_id; } - segmap += 2 * cm->mb_cols; - segcache += 2 * cm->mb_cols; - mi += 2 * cm->mode_info_stride; + mi_ptr += cm->mode_info_stride; + cache_ptr += cm->mb_cols; } } @@ -1788,7 +1764,10 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cm->prob_gf_coded = 128; cm->prob_intra_coded = 63; #if CONFIG_SUPERBLOCKS - cm->sb_coded = 200; + cm->sb32_coded = 200; +#if CONFIG_SUPERBLOCKS64 + cm->sb64_coded = 200; +#endif #endif for (i = 0; i < COMP_PRED_CONTEXTS; i++) cm->prob_comppred[i] = 128; @@ -1994,6 +1973,13 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { vp9_variance_halfpixvar32x32_h, vp9_variance_halfpixvar32x32_v, vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8, vp9_sad32x32x4d) + +#if CONFIG_SUPERBLOCKS64 + BFP(BLOCK_64X64, vp9_sad64x64, vp9_variance64x64, vp9_sub_pixel_variance64x64, + vp9_variance_halfpixvar64x64_h, vp9_variance_halfpixvar64x64_v, + vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8, + vp9_sad64x64x4d) +#endif #endif BFP(BLOCK_16X16, vp9_sad16x16, vp9_variance16x16, vp9_sub_pixel_variance16x16, diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 7c9181ba7..11428352f 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -390,8 +390,15 @@ enum BlockSize { BLOCK_4X4 = PARTITIONING_4X4, BLOCK_16X16, BLOCK_MAX_SEGMENTS, +#if CONFIG_SUPERBLOCKS BLOCK_32X32 = BLOCK_MAX_SEGMENTS, +#if CONFIG_SUPERBLOCKS64 + BLOCK_64X64, +#endif // CONFIG_SUPERBLOCKS64 BLOCK_MAX_SB_SEGMENTS, +#else // CONFIG_SUPERBLOCKS + BLOCK_MAX_SB_SEGMENTS = BLOCK_MAX_SEGMENTS, +#endif // CONFIG_SUPERBLOCKS }; typedef struct VP9_COMP { @@ -571,7 +578,10 @@ typedef struct VP9_COMP { int cq_target_quality; #if CONFIG_SUPERBLOCKS - int sb_count; + int sb32_count[2]; +#if CONFIG_SUPERBLOCKS64 + int sb64_count[2]; +#endif int sb_ymode_count [VP9_I32X32_MODES]; #endif int ymode_count[VP9_YMODES]; /* intra MB type cts this frame */ diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 774b577a0..a79cb5aad 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -41,6 +41,7 @@ #include "vp9/common/vp9_entropy.h" #include "vp9_rtcd.h" #include "vp9/common/vp9_mvref_common.h" +#include "vp9/common/vp9_common.h" #define MAXF(a,b) (((a) > (b)) ? (a) : (b)) @@ -926,14 +927,21 @@ static void copy_predictor(uint8_t *dst, const uint8_t *predictor) { #if CONFIG_SUPERBLOCKS #if CONFIG_TX32X32 -static int rdcost_sby_32x32(MACROBLOCK *x) { +static int rdcost_sby_32x32(MACROBLOCK *x, int backup) { MACROBLOCKD * const xd = &x->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above, - *tl = (ENTROPY_CONTEXT *) &t_left; + ENTROPY_CONTEXT *ta, *tl; - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + if (backup) { + ta = (ENTROPY_CONTEXT *) &t_above, + tl = (ENTROPY_CONTEXT *) &t_left; + + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + } else { + ta = (ENTROPY_CONTEXT *) xd->above_context; + tl = (ENTROPY_CONTEXT *) xd->left_context; + } return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); } @@ -953,7 +961,8 @@ static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, #define DEBUG_ERROR 0 static void super_block_yrd_32x32(MACROBLOCK *x, - int *rate, int *distortion, int *skippable) { + int *rate, int *distortion, int *skippable, + int backup) { SUPERBLOCK * const x_sb = &x->sb_coeff_data; MACROBLOCKD * const xd = &x->e_mbd; SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data; @@ -976,7 +985,7 @@ static void super_block_yrd_32x32(MACROBLOCK *x, printf("IDCT/FDCT error 32x32: %d (d: %d)\n", vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion); #endif - *rate = rdcost_sby_32x32(x); + *rate = rdcost_sby_32x32(x, backup); *skippable = vp9_sby_is_skippable_32x32(&x->e_mbd); } #endif @@ -1005,7 +1014,7 @@ static void super_block_yrd(VP9_COMP *cpi, #if CONFIG_TX32X32 vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, dst, dst_y_stride); - super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); + super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1); #endif #if DEBUG_ERROR @@ -1065,6 +1074,104 @@ static void super_block_yrd(VP9_COMP *cpi, xd->above_context = orig_above; xd->left_context = orig_left; } + +static void super_block_64_yrd(VP9_COMP *cpi, + MACROBLOCK *x, int *rate, int *distortion, + int *skip, + int64_t txfm_cache[NB_TXFM_MODES]) { + MACROBLOCKD *const xd = &x->e_mbd; + int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n; + const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; + int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; + ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_SB][4], + *orig_above = xd->above_context; + ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_SB][4], + *orig_left = xd->left_context; + + for (n = TX_4X4; n < TX_SIZE_MAX_SB; n++) { + vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n])); + vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n])); + r[n][0] = 0; + d[n] = 0; + s[n] = 1; + } + +#if CONFIG_TX32X32 + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; + int r_tmp, d_tmp, s_tmp; + + xd->above_context = &t_above[TX_32X32][x_idx << 1]; + xd->left_context = &t_left[TX_32X32][y_idx << 1]; + vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, + src + 32 * x_idx + 32 * y_idx * src_y_stride, + src_y_stride, + dst + 32 * x_idx + 32 * y_idx * dst_y_stride, + dst_y_stride); + super_block_yrd_32x32(x, &r_tmp, &d_tmp, &s_tmp, 0); + r[TX_32X32][0] += r_tmp; + d[TX_32X32] += d_tmp; + s[TX_32X32] = s[TX_32X32] && s_tmp; + } +#endif + +#if DEBUG_ERROR + int err[3] = { 0, 0, 0 }; +#endif + for (n = 0; n < 16; n++) { + int x_idx = n & 3, y_idx = n >> 2; + int r_tmp, d_tmp, s_tmp; + + vp9_subtract_mby_s_c(x->src_diff, + src + x_idx * 16 + y_idx * 16 * src_y_stride, + src_y_stride, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride, + dst_y_stride); + + xd->above_context = &t_above[TX_16X16][x_idx]; + xd->left_context = &t_left[TX_16X16][y_idx]; + macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0); + d[TX_16X16] += d_tmp; + r[TX_16X16][0] += r_tmp; + s[TX_16X16] = s[TX_16X16] && s_tmp; +#if DEBUG_ERROR + vp9_inverse_transform_mby_16x16(xd); + err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256); +#endif + + xd->above_context = &t_above[TX_4X4][x_idx]; + xd->left_context = &t_left[TX_4X4][y_idx]; + macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0); + d[TX_4X4] += d_tmp; + r[TX_4X4][0] += r_tmp; + s[TX_4X4] = s[TX_4X4] && s_tmp; +#if DEBUG_ERROR + vp9_inverse_transform_mby_4x4(xd); + err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256); +#endif + + xd->above_context = &t_above[TX_8X8][x_idx]; + xd->left_context = &t_left[TX_8X8][y_idx]; + macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0); + d[TX_8X8] += d_tmp; + r[TX_8X8][0] += r_tmp; + s[TX_8X8] = s[TX_8X8] && s_tmp; +#if DEBUG_ERROR + vp9_inverse_transform_mby_8x8(xd); + err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256); +#endif + } +#if DEBUG_ERROR + printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]); + printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]); + printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]); +#endif + choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, + TX_SIZE_MAX_SB - 1); + + xd->above_context = orig_above; + xd->left_context = orig_left; +} #endif static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) { @@ -1359,6 +1466,48 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, return best_rd; } + +#if CONFIG_SUPERBLOCKS64 +static int64_t rd_pick_intra_sb64y_mode(VP9_COMP *cpi, + MACROBLOCK *x, + int *rate, + int *rate_tokenonly, + int *distortion, + int *skippable, + int64_t txfm_cache[NB_TXFM_MODES]) { + MB_PREDICTION_MODE mode; + MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); + int this_rate, this_rate_tokenonly; + int this_distortion, s; + int64_t best_rd = INT64_MAX, this_rd; + + /* Y Search for 32x32 intra prediction mode */ + for (mode = DC_PRED; mode <= TM_PRED; mode++) { + x->e_mbd.mode_info_context->mbmi.mode = mode; + vp9_build_intra_predictors_sb64y_s(&x->e_mbd); + + super_block_64_yrd(cpi, x, &this_rate_tokenonly, + &this_distortion, &s, txfm_cache); + this_rate = this_rate_tokenonly + + x->mbmode_cost[x->e_mbd.frame_type] + [x->e_mbd.mode_info_context->mbmi.mode]; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + + if (this_rd < best_rd) { + mode_selected = mode; + best_rd = this_rd; + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + } + } + + x->e_mbd.mode_info_context->mbmi.mode = mode_selected; + + return best_rd; +} +#endif // CONFIG_SUPERBLOCKS64 #endif static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, @@ -1735,18 +1884,23 @@ static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, #if CONFIG_SUPERBLOCKS #if CONFIG_TX32X32 -static int rd_cost_sbuv_16x16(MACROBLOCK *x) { +static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) { int b; int cost = 0; MACROBLOCKD *const xd = &x->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta, *tl; - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + if (backup) { + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - ta = (ENTROPY_CONTEXT *) &t_above; - tl = (ENTROPY_CONTEXT *) &t_left; + ta = (ENTROPY_CONTEXT *) &t_above; + tl = (ENTROPY_CONTEXT *) &t_left; + } else { + ta = (ENTROPY_CONTEXT *)xd->above_context; + tl = (ENTROPY_CONTEXT *)xd->left_context; + } for (b = 16; b < 24; b += 4) cost += cost_coeffs(x, xd->block + b, PLANE_TYPE_UV, @@ -1757,13 +1911,14 @@ static int rd_cost_sbuv_16x16(MACROBLOCK *x) { } static void rd_inter32x32_uv_16x16(MACROBLOCK *x, int *rate, - int *distortion, int *skip) { + int *distortion, int *skip, + int backup) { MACROBLOCKD *const xd = &x->e_mbd; vp9_transform_sbuv_16x16(x); vp9_quantize_sbuv_16x16(x); - *rate = rd_cost_sbuv_16x16(x); + *rate = rd_cost_sbuv_16x16(x, backup); *distortion = vp9_block_error_c(x->sb_coeff_data.coeff + 1024, xd->sb_coeff_data.dqcoeff + 1024, 512) >> 2; *skip = vp9_sbuv_is_skippable_16x16(xd); @@ -1783,7 +1938,7 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); - rd_inter32x32_uv_16x16(x, rate, distortion, skip); + rd_inter32x32_uv_16x16(x, rate, distortion, skip, 1); } else { #endif int n, r = 0, d = 0; @@ -1833,6 +1988,14 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } + +static void super_block_64_uvrd(MACROBLOCK *x, int *rate, + int *distortion, int *skip); +static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, + int *distortion, int fullpixel, int *skip) { + super_block_64_uvrd(x, rate, distortion, skip); + return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); +} #endif static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, @@ -1984,13 +2147,13 @@ static void super_block_uvrd(MACROBLOCK *x, vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); - rd_inter32x32_uv_16x16(x, rate, distortion, skippable); + rd_inter32x32_uv_16x16(x, rate, distortion, skippable, 1); } else { #endif int d = 0, r = 0, n, s = 1; ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT_PLANES *ta = xd->above_context; - ENTROPY_CONTEXT_PLANES *tl = xd->left_context; + ENTROPY_CONTEXT_PLANES *ta_orig = xd->above_context; + ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context; memcpy(t_above, xd->above_context, sizeof(t_above)); memcpy(t_left, xd->left_context, sizeof(t_left)); @@ -2016,24 +2179,107 @@ static void super_block_uvrd(MACROBLOCK *x, } d += vp9_mbuverror(x) >> 2; - xd->above_context = ta + x_idx; - xd->left_context = tl + y_idx; - r += rd_cost_mbuv_8x8(x, 0); + xd->above_context = t_above + x_idx; + xd->left_context = t_left + y_idx; + if (mbmi->txfm_size == TX_4X4) { + r += rd_cost_mbuv_4x4(x, 0); + } else { + r += rd_cost_mbuv_8x8(x, 0); + } } - xd->above_context = ta; - xd->left_context = tl; + xd->above_context = ta_orig; + xd->left_context = tl_orig; + + *distortion = d; + *rate = r; + *skippable = s; +#if CONFIG_TX32X32 + } +#endif +} + +static void super_block_64_uvrd(MACROBLOCK *x, + int *rate, + int *distortion, + int *skippable) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; + const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; + const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; + int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; + ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; + ENTROPY_CONTEXT_PLANES *ta_orig = xd->above_context; + ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context; + int d = 0, r = 0, n, s = 1; + + memcpy(t_above, xd->above_context, sizeof(t_above)); + memcpy(t_left, xd->left_context, sizeof(t_left)); + +#if CONFIG_TX32X32 + if (mbmi->txfm_size == TX_32X32) { + int n; + + *rate = 0; + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; + int r_tmp, d_tmp, s_tmp; + + vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + usrc + x_idx * 16 + y_idx * 16 * src_uv_stride, + vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride, + src_uv_stride, + udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + dst_uv_stride); + xd->above_context = t_above + x_idx * 2; + xd->left_context = t_left + y_idx * 2; + rd_inter32x32_uv_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0); + r += r_tmp; + d += d_tmp; + s = s && s_tmp; + } + } else { +#endif + for (n = 0; n < 16; n++) { + int x_idx = n & 3, y_idx = n >> 2; + + vp9_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + if (mbmi->txfm_size == TX_4X4) { + vp9_transform_mbuv_4x4(x); + vp9_quantize_mbuv_4x4(x); + s &= vp9_mbuv_is_skippable_4x4(xd); + } else { + vp9_transform_mbuv_8x8(x); + vp9_quantize_mbuv_8x8(x); + s &= vp9_mbuv_is_skippable_8x8(xd); + } + + xd->above_context = t_above + x_idx; + xd->left_context = t_left + y_idx; + d += vp9_mbuverror(x) >> 2; + if (mbmi->txfm_size == TX_4X4) { + r += rd_cost_mbuv_4x4(x, 0); + } else { + r += rd_cost_mbuv_8x8(x, 0); + } + } +#if CONFIG_TX32X32 + } +#endif + *distortion = d; *rate = r; *skippable = s; - xd->left_context = tl; - xd->above_context = ta; - memcpy(xd->above_context, t_above, sizeof(t_above)); - memcpy(xd->left_context, t_left, sizeof(t_left)); -#if CONFIG_TX32X32 - } -#endif + xd->left_context = tl_orig; + xd->above_context = ta_orig; } static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, @@ -2072,6 +2318,45 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, return best_rd; } + +#if CONFIG_SUPERBLOCKS64 +static int64_t rd_pick_intra_sb64uv_mode(VP9_COMP *cpi, + MACROBLOCK *x, + int *rate, + int *rate_tokenonly, + int *distortion, + int *skippable) { + MB_PREDICTION_MODE mode; + MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); + int64_t best_rd = INT64_MAX, this_rd; + int this_rate_tokenonly, this_rate; + int this_distortion, s; + + for (mode = DC_PRED; mode <= TM_PRED; mode++) { + x->e_mbd.mode_info_context->mbmi.uv_mode = mode; + vp9_build_intra_predictors_sb64uv_s(&x->e_mbd); + + super_block_64_uvrd(x, &this_rate_tokenonly, + &this_distortion, &s); + this_rate = this_rate_tokenonly + + x->intra_uv_mode_cost[x->e_mbd.frame_type][mode]; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + + if (this_rd < best_rd) { + mode_selected = mode; + best_rd = this_rd; + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + } + } + + x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; + + return best_rd; +} +#endif // CONFIG_SUPERBLOCKS64 #endif int vp9_cost_mv_ref(VP9_COMP *cpi, @@ -3161,8 +3446,6 @@ static void inter_mode_cost(VP9_COMP *cpi, MACROBLOCK *x, *skippable = y_skippable && uv_skippable; } -#define MIN(x,y) (((x)<(y))?(x):(y)) -#define MAX(x,y) (((x)>(y))?(x):(y)) static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int idx, MV_REFERENCE_FRAME frame_type, int block_size, @@ -3367,7 +3650,28 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } #endif - if (block_size == BLOCK_16X16) { +#if CONFIG_SUPERBLOCKS +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + vp9_build_inter64x64_predictors_sb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride); + } else +#endif // CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_32X32) { + vp9_build_inter32x32_predictors_sb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride); + } else +#endif // CONFIG_SUPERBLOCKS + { + assert(block_size == BLOCK_16X16); vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); if (is_comp_pred) vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16); @@ -3375,15 +3679,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (is_comp_interintra_pred) { vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16); } -#endif - } else { -#if CONFIG_SUPERBLOCKS - vp9_build_inter32x32_predictors_sb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); #endif } @@ -3397,14 +3692,22 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (threshold < x->encode_breakout) threshold = x->encode_breakout; - if (block_size == BLOCK_16X16) { - var = vp9_variance16x16(*(b->base_src), b->src_stride, - xd->predictor, 16, &sse); - } else { #if CONFIG_SUPERBLOCKS +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + var = vp9_variance64x64(*(b->base_src), b->src_stride, + xd->dst.y_buffer, xd->dst.y_stride, &sse); + } else +#endif // CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_32X32) { var = vp9_variance32x32(*(b->base_src), b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); -#endif + } else +#endif // CONFIG_SUPERBLOCK + { + assert(block_size == BLOCK_16X16); + var = vp9_variance16x16(*(b->base_src), b->src_stride, + xd->predictor, 16, &sse); } if ((int)sse < threshold) { @@ -3416,15 +3719,29 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Check u and v to make sure skip is ok int sse2; - if (block_size == BLOCK_16X16) { - sse2 = vp9_uvsse(x); - } else { +#if CONFIG_SUPERBLOCKS +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + unsigned int sse2u, sse2v; + var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride, + xd->dst.u_buffer, xd->dst.uv_stride, &sse2u); + var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride, + xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); + sse2 = sse2u + sse2v; + } else +#endif // CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_32X32) { unsigned int sse2u, sse2v; var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride, xd->dst.u_buffer, xd->dst.uv_stride, &sse2u); var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride, xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); sse2 = sse2u + sse2v; + } else +#endif // CONFIG_SUPERBLOCKS + { + assert(block_size == BLOCK_16X16); + sse2 = vp9_uvsse(x); } if (sse2 * 2 < threshold) { @@ -3455,23 +3772,26 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (!x->skip) { - if (block_size == BLOCK_16X16) { - vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); - if (is_comp_pred) - vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); -#if CONFIG_COMP_INTERINTRA_PRED - if (is_comp_interintra_pred) { - vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); - } -#endif - inter_mode_cost(cpi, x, rate2, distortion, - rate_y, distortion_y, rate_uv, distortion_uv, - skippable, txfm_cache); - } else { #if CONFIG_SUPERBLOCKS +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + int skippable_y, skippable_uv; + + // Y cost and distortion + super_block_64_yrd(cpi, x, rate_y, distortion_y, + &skippable_y, txfm_cache); + *rate2 += *rate_y; + *distortion += *distortion_y; + + rd_inter64x64_uv(cpi, x, rate_uv, distortion_uv, + cm->full_pixel, &skippable_uv); + + *rate2 += *rate_uv; + *distortion += *distortion_uv; + *skippable = skippable_y && skippable_uv; + } else +#endif // CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_32X32) { int skippable_y, skippable_uv; // Y cost and distortion @@ -3486,7 +3806,25 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *rate2 += *rate_uv; *distortion += *distortion_uv; *skippable = skippable_y && skippable_uv; + } else +#endif // CONFIG_SUPERBLOCKS + { + assert(block_size == BLOCK_16X16); + + vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256], + &xd->predictor[320], 8); + if (is_comp_pred) + vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256], + &xd->predictor[320], 8); +#if CONFIG_COMP_INTERINTRA_PRED + if (is_comp_interintra_pred) { + vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256], + &xd->predictor[320], 8); + } #endif + inter_mode_cost(cpi, x, rate2, distortion, + rate_y, distortion_y, rate_uv, distortion_uv, + skippable, txfm_cache); } } return this_rd; // if 0, this will be re-calculated by caller @@ -3554,7 +3892,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vpx_memset(&frame_mv, 0, sizeof(frame_mv)); vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); vpx_memset(&best_bmodes, 0, sizeof(best_bmodes)); - vpx_memset(&x->mb_context[xd->mb_index], 0, sizeof(PICK_MODE_CONTEXT)); + vpx_memset(&x->mb_context[xd->sb_index][xd->mb_index], 0, + sizeof(PICK_MODE_CONTEXT)); for (i = 0; i < MAX_REF_FRAMES; i++) frame_mv[NEWMV][i].as_int = INVALID_MV; @@ -3787,7 +4126,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_COMP_INTRA_PRED 0, #endif - 0); + cpi->update_context); rate2 += rate; distortion2 += distortion; @@ -4298,18 +4637,18 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } end: - store_coding_context( - x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition, - &mbmi->ref_mvs[mbmi->ref_frame][0], - &mbmi->ref_mvs[mbmi->second_ref_frame < 0 - ? 0 : mbmi->second_ref_frame][0], - best_pred_diff, best_txfm_diff); + store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index], + best_mode_index, &best_partition, + &mbmi->ref_mvs[mbmi->ref_frame][0], + &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 : + mbmi->second_ref_frame][0], + best_pred_diff, best_txfm_diff); } #if CONFIG_SUPERBLOCKS -void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int *returnrate, - int *returndist) { +void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, + int *returnrate, + int *returndist) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; int rate_y, rate_uv; @@ -4335,6 +4674,37 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returndist = dist_y + (dist_uv >> 2); } } + +#if CONFIG_SUPERBLOCKS64 +void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, + int *returnrate, + int *returndist) { + VP9_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + int rate_y, rate_uv; + int rate_y_tokenonly, rate_uv_tokenonly; + int error_y, error_uv; + int dist_y, dist_uv; + int y_skip, uv_skip; + int64_t txfm_cache[NB_TXFM_MODES]; + + error_y = rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly, + &dist_y, &y_skip, txfm_cache); + error_uv = rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, + &dist_uv, &uv_skip); + + if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) { + *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); + *returndist = dist_y + (dist_uv >> 2); + } else { + *returnrate = rate_y + rate_uv; + if (cm->mb_no_coeff_skip) + *returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); + *returndist = dist_y + (dist_uv >> 2); + } +} +#endif #endif void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -4409,11 +4779,12 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_COMP_INTRA_PRED 0, #endif - 0); + cpi->update_context); #if CONFIG_COMP_INTRA_PRED error4x4d = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4d, &rate4x4_tokenonly, - &dist4x4d, error16x16, 1, 0); + &dist4x4d, error16x16, 1, + cpi->update_context); #endif mbmi->mb_skip_coeff = 0; @@ -4426,8 +4797,8 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); dist = dist16x16 + (distuv8x8 >> 2); mbmi->txfm_size = txfm_size_16x16; - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); + memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0, + sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff)); } else if (error8x8 > error16x16) { if (error4x4 < error16x16) { rate = rateuv; @@ -4444,15 +4815,16 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mode = B_PRED; mbmi->txfm_size = TX_4X4; dist = dist4x4 + (distuv >> 2); - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); + memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0, + sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff)); } else { mbmi->txfm_size = txfm_size_16x16; mbmi->mode = mode16x16; rate = rate16x16 + rateuv8x8; dist = dist16x16 + (distuv8x8 >> 2); for (i = 0; i < NB_TXFM_MODES; i++) { - x->mb_context[xd->mb_index].txfm_rd_diff[i] = error16x16 - txfm_cache[i]; + x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] = + error16x16 - txfm_cache[i]; } } if (cpi->common.mb_no_coeff_skip) @@ -4473,8 +4845,8 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mode = B_PRED; mbmi->txfm_size = TX_4X4; dist = dist4x4 + (distuv >> 2); - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); + memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0, + sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff)); } else { // FIXME(rbultje) support transform-size selection mbmi->mode = I8X8_PRED; @@ -4482,8 +4854,8 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, set_i8x8_block_modes(x, mode8x8); rate = rate8x8 + rateuv; dist = dist8x8 + (distuv >> 2); - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); + memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0, + sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff)); } if (cpi->common.mb_no_coeff_skip) rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); @@ -4494,9 +4866,11 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, } #if CONFIG_SUPERBLOCKS -int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndistortion) { +static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, int recon_uvoffset, + int *returnrate, + int *returndistortion, + int block_size) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; @@ -4556,7 +4930,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { - setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, BLOCK_32X32, + setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size, recon_yoffset, recon_uvoffset, frame_mv[NEARESTMV], frame_mv[NEARMV], frame_mdcounts, y_buffer, u_buffer, v_buffer); @@ -4565,27 +4939,56 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, frame_mv[ZEROMV][ref_frame].as_int = 0; } - mbmi->mode = DC_PRED; - if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { - mbmi->txfm_size = TX_4X4; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, - &dist_uv_4x4, &uv_skip_4x4); - mode_uv_4x4 = mbmi->uv_mode; - } - if (cm->txfm_mode != ONLY_4X4) { - mbmi->txfm_size = TX_8X8; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, - &dist_uv_8x8, &uv_skip_8x8); - mode_uv_8x8 = mbmi->uv_mode; - } +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + mbmi->mode = DC_PRED; + if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { + mbmi->txfm_size = TX_4X4; + rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, + &dist_uv_4x4, &uv_skip_4x4); + mode_uv_4x4 = mbmi->uv_mode; + } + if (cm->txfm_mode != ONLY_4X4) { + mbmi->txfm_size = TX_8X8; + rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, + &dist_uv_8x8, &uv_skip_8x8); + mode_uv_8x8 = mbmi->uv_mode; + } #if CONFIG_TX32X32 - if (cm->txfm_mode >= ALLOW_32X32) { - mbmi->txfm_size = TX_32X32; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_16x16, &rate_uv_tokenonly_16x16, - &dist_uv_16x16, &uv_skip_16x16); - mode_uv_16x16 = mbmi->uv_mode; + if (cm->txfm_mode >= ALLOW_32X32) { + mbmi->txfm_size = TX_32X32; + rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_16x16, + &rate_uv_tokenonly_16x16, + &dist_uv_16x16, &uv_skip_16x16); + mode_uv_16x16 = mbmi->uv_mode; + } +#endif // CONFIG_TX32X32 + } else +#endif // CONFIG_SUPERBLOCKS64 + { + assert(block_size == BLOCK_32X32); + mbmi->mode = DC_PRED; + if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { + mbmi->txfm_size = TX_4X4; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, + &dist_uv_4x4, &uv_skip_4x4); + mode_uv_4x4 = mbmi->uv_mode; + } + if (cm->txfm_mode != ONLY_4X4) { + mbmi->txfm_size = TX_8X8; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, + &dist_uv_8x8, &uv_skip_8x8); + mode_uv_8x8 = mbmi->uv_mode; + } +#if CONFIG_TX32X32 + if (cm->txfm_mode >= ALLOW_32X32) { + mbmi->txfm_size = TX_32X32; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_16x16, &rate_uv_tokenonly_16x16, + &dist_uv_16x16, &uv_skip_16x16); + mode_uv_16x16 = mbmi->uv_mode; + } +#endif // CONFIG_TX32X32 } -#endif for (mode_index = 0; mode_index < MAX_MODES; mode_index += (!switchable_filter_index)) { @@ -4713,9 +5116,19 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (ref_frame == INTRA_FRAME) { - vp9_build_intra_predictors_sby_s(xd); - super_block_yrd(cpi, x, &rate_y, &distortion_y, - &skippable, txfm_cache); +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + vp9_build_intra_predictors_sb64y_s(xd); + super_block_64_yrd(cpi, x, &rate_y, &distortion_y, + &skippable, txfm_cache); + } else +#endif // CONFIG_SUPERBLOCKS64 + { + assert(block_size == BLOCK_32X32); + vp9_build_intra_predictors_sby_s(xd); + super_block_yrd(cpi, x, &rate_y, &distortion_y, + &skippable, txfm_cache); + } if (mbmi->txfm_size == TX_4X4) { rate_uv = rate_uv_4x4; distortion_uv = dist_uv_4x4; @@ -4727,7 +5140,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, distortion_uv = dist_uv_16x16; skippable = skippable && uv_skip_16x16; mbmi->uv_mode = mode_uv_16x16; -#endif +#endif // CONFIG_TX32X32 } else { rate_uv = rate_uv_8x8; distortion_uv = dist_uv_8x8; @@ -4749,7 +5162,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, #endif } #endif - this_rd = handle_inter_mode(cpi, x, BLOCK_32X32, + this_rd = handle_inter_mode(cpi, x, block_size, &saddone, near_sadidx, mdcounts, txfm_cache, &rate2, &distortion2, &skippable, &compmode_cost, @@ -5021,14 +5434,41 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } end: - store_coding_context(x, &x->sb_context[0], best_mode_index, NULL, - &mbmi->ref_mvs[mbmi->ref_frame][0], - &mbmi->ref_mvs[mbmi->second_ref_frame < 0 - ? 0 : mbmi->second_ref_frame][0], - best_pred_diff, best_txfm_diff); + { +#if CONFIG_SUPERBLOCKS64 + PICK_MODE_CONTEXT *p = (block_size == BLOCK_32X32) ? + &x->sb32_context[xd->sb_index] : + &x->sb64_context; +#else + PICK_MODE_CONTEXT *p = &x->sb32_context[xd->sb_index]; +#endif + store_coding_context(x, p, best_mode_index, NULL, + &mbmi->ref_mvs[mbmi->ref_frame][0], + &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 : + mbmi->second_ref_frame][0], + best_pred_diff, best_txfm_diff); + } return best_rd; } + +int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, int recon_uvoffset, + int *returnrate, + int *returndistortion) { + return vp9_rd_pick_inter_mode_sb(cpi, x, recon_yoffset, recon_uvoffset, + returnrate, returndistortion, BLOCK_32X32); +} + +#if CONFIG_SUPERBLOCKS64 +int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, int recon_uvoffset, + int *returnrate, + int *returndistortion) { + return vp9_rd_pick_inter_mode_sb(cpi, x, recon_yoffset, recon_uvoffset, + returnrate, returndistortion, BLOCK_64X64); +} +#endif // CONFIG_SUPERBLOCKS64 #endif void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, @@ -5063,8 +5503,8 @@ void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, // vp9_pick_inter_mode // Store metrics so they can be added in to totals if this mode is picked - x->mb_context[xd->mb_index].distortion = distortion; - x->mb_context[xd->mb_index].intra_error = intra_error; + x->mb_context[xd->sb_index][xd->mb_index].distortion = distortion; + x->mb_context[xd->sb_index][xd->mb_index].intra_error = intra_error; *totalrate = rate; *totaldist = distortion; diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 4c2c33a74..8ee2c0bf9 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -22,16 +22,23 @@ extern void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex); extern void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, int *r, int *d); -extern void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int *r, int *d); +extern void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, + int *r, int *d); + +extern void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, + int *r, int *d); extern void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, - int recon_uvoffset, int *r, int *d); + int ref_yoffset, int ref_uvoffset, + int *r, int *d); -extern int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndist); +extern int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, + int ref_yoffset, int ref_uvoffset, + int *r, int *d); + +extern int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, + int ref_yoffset, int ref_uvoffset, + int *r, int *d); extern void vp9_init_me_luts(); diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c index e5249e537..9ce27fbed 100644 --- a/vp9/encoder/vp9_sad_c.c +++ b/vp9/encoder/vp9_sad_c.c @@ -14,6 +14,14 @@ #include "vpx_ports/config.h" #include "vpx/vpx_integer.h" +unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 64); +} + unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, @@ -64,6 +72,19 @@ unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4); } +void vp9_sad64x64x3_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr, ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 2, ref_stride, 0x7fffffff); +} + void vp9_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, @@ -77,6 +98,37 @@ void vp9_sad32x32x3_c(const uint8_t *src_ptr, ref_ptr + 2, ref_stride, 0x7fffffff); } +void vp9_sad64x64x8_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + uint16_t *sad_array) { + sad_array[0] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); +} + void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, @@ -328,6 +380,21 @@ void vp9_sad4x4x8_c(const uint8_t *src_ptr, 0x7fffffff); } +void vp9_sad64x64x4d_c(const uint8_t *src_ptr, + int src_stride, + uint8_t *ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, uint8_t *ref_ptr[], diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index ee90f4fc3..19529fcbe 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -141,21 +141,57 @@ static int cost_segmap(MACROBLOCKD *xd, segcounts[3] * vp9_cost_one(probs[2]); return cost; +} +static void count_segs(VP9_COMP *cpi, + MODE_INFO *mi, + int *no_pred_segcounts, + int (*temporal_predictor_count)[2], + int *t_unpred_seg_counts, + int mb_size, int mb_row, int mb_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int segmap_index = mb_row * cm->mb_cols + mb_col; + const int segment_id = mi->mbmi.segment_id; + + xd->mode_info_context = mi; + xd->mb_to_top_edge = -((mb_row * 16) << 3); + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - mb_size - mb_row) * 16) << 3; + xd->mb_to_right_edge = ((cm->mb_cols - mb_size - mb_col) * 16) << 3; + + // Count the number of hits on each segment with no prediction + no_pred_segcounts[segment_id]++; + + // Temporal prediction not allowed on key frames + if (cm->frame_type != KEY_FRAME) { + // Test to see if the segment id matches the predicted value. + const int seg_predicted = + (segment_id == vp9_get_pred_mb_segid(cm, xd, segmap_index)); + + // Get the segment id prediction context + const int pred_context = vp9_get_pred_context(cm, xd, PRED_SEG_ID); + + // Store the prediction status for this mb and update counts + // as appropriate + vp9_set_pred_flag(xd, PRED_SEG_ID, seg_predicted); + temporal_predictor_count[pred_context][seg_predicted]++; + + if (!seg_predicted) + // Update the "unpredicted" segment count + t_unpred_seg_counts[segment_id]++; + } } void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - int i; int no_pred_cost; int t_pred_cost = INT_MAX; - int pred_context; + int i; int mb_row, mb_col; - int segmap_index = 0; - unsigned char segment_id; int temporal_predictor_count[PREDICTION_PROBS][2]; int no_pred_segcounts[MAX_MB_SEGMENTS]; @@ -165,9 +201,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { vp9_prob t_pred_tree[MB_FEATURE_TREE_PROBS]; vp9_prob t_nopred_prob[PREDICTION_PROBS]; -#if CONFIG_SUPERBLOCKS const int mis = cm->mode_info_stride; -#endif + MODE_INFO *mi_ptr = cm->mi, *mi; // Set default state for the segment tree probabilities and the // temporal coding probabilities @@ -183,87 +218,57 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // First of all generate stats regarding how well the last segment map // predicts this one - // Initialize macroblock decoder mode info context for the first mb - // in the frame - xd->mode_info_context = cm->mi; - - for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 2) { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 2) { - for (i = 0; i < 4; i++) { - static const int dx[4] = { +1, -1, +1, +1 }; - static const int dy[4] = { 0, +1, 0, -1 }; - int x_idx = i & 1, y_idx = i >> 1; - - if (mb_col + x_idx >= cm->mb_cols || - mb_row + y_idx >= cm->mb_rows) { - goto end; - } - - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); - - segmap_index = (mb_row + y_idx) * cm->mb_cols + mb_col + x_idx; - segment_id = xd->mode_info_context->mbmi.segment_id; + for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { + mi = mi_ptr; + for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) { +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, 4, mb_row, mb_col); + } else +#endif + { + for (i = 0; i < 4; i++) { + int x_idx = (i & 1) << 1, y_idx = i & 2; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - if (mb_col + 1 < cm->mb_cols) - segment_id = segment_id && - xd->mode_info_context[1].mbmi.segment_id; - if (mb_row + 1 < cm->mb_rows) { - segment_id = segment_id && - xd->mode_info_context[mis].mbmi.segment_id; - if (mb_col + 1 < cm->mb_cols) - segment_id = segment_id && - xd->mode_info_context[mis + 1].mbmi.segment_id; + MODE_INFO *sb_mi = mi + y_idx * mis + x_idx; +#endif + + if (mb_col + x_idx >= cm->mb_cols || + mb_row + y_idx >= cm->mb_rows) { + continue; } - xd->mb_to_bottom_edge = ((cm->mb_rows - 2 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 2 - mb_col) * 16) << 3; - } else { -#endif - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; -#if CONFIG_SUPERBLOCKS - } -#endif - - // Count the number of hits on each segment with no prediction - no_pred_segcounts[segment_id]++; - - // Temporal prediction not allowed on key frames - if (cm->frame_type != KEY_FRAME) { - // Test to see if the segment id matches the predicted value. - int seg_predicted = - (segment_id == vp9_get_pred_mb_segid(cm, xd, segmap_index)); - - // Get the segment id prediction context - pred_context = - vp9_get_pred_context(cm, xd, PRED_SEG_ID); - - // Store the prediction status for this mb and update counts - // as appropriate - vp9_set_pred_flag(xd, PRED_SEG_ID, seg_predicted); - temporal_predictor_count[pred_context][seg_predicted]++; - - if (!seg_predicted) - // Update the "unpredicted" segment count - t_unpred_seg_counts[segment_id]++; - } #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - assert(!i); - xd->mode_info_context += 2; - break; - } + if (sb_mi->mbmi.sb_type) { + assert(sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32); + count_segs(cpi, sb_mi, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, 2, mb_row + y_idx, mb_col + x_idx); + } else #endif - end: - xd->mode_info_context += dx[i] + dy[i] * cm->mode_info_stride; + { + int j; + + for (j = 0; j < 4; j++) { + const int x_idx_mb = x_idx + (j & 1), y_idx_mb = y_idx + (j >> 1); + MODE_INFO *mb_mi = mi + x_idx_mb + y_idx_mb * mis; + + if (mb_col + x_idx_mb >= cm->mb_cols || + mb_row + y_idx_mb >= cm->mb_rows) { + continue; + } + +#if CONFIG_SUPERBLOCKS + assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16); +#endif + count_segs(cpi, mb_mi, no_pred_segcounts, + temporal_predictor_count, t_unpred_seg_counts, + 1, mb_row + y_idx_mb, mb_col + x_idx_mb); + } + } + } } } - - // this is to account for the border in mode_info_context - xd->mode_info_context -= mb_col; - xd->mode_info_context += cm->mode_info_stride * 2; } // Work out probability tree for coding segments without prediction diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index ecb92572b..9060d4c74 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -25,6 +25,19 @@ unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { } #if CONFIG_SUPERBLOCKS +unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg); + *sse = var; + return (var - (((int64_t)avg * avg) >> 12)); +} + unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -185,6 +198,27 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, } #if CONFIG_SUPERBLOCKS +unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t FData3[65 * 64]; // Temp data bufffer used in filtering + uint8_t temp2[68 * 64]; + const int16_t *HFilter, *VFilter; + + HFilter = vp9_bilinear_filters[xoffset]; + VFilter = vp9_bilinear_filters[yoffset]; + + var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, + 1, 65, 64, HFilter); + var_filter_block2d_bil_second_pass(FData3, temp2, 64, 64, 64, 64, VFilter); + + return vp9_variance64x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse); +} + unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -224,6 +258,15 @@ unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, ref_ptr, recon_stride, sse); } + +unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, + ref_ptr, recon_stride, sse); +} #endif @@ -245,6 +288,15 @@ unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, ref_ptr, recon_stride, sse); } + +unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, + ref_ptr, recon_stride, sse); +} #endif unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, @@ -265,6 +317,15 @@ unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, ref_ptr, recon_stride, sse); } + +unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, + ref_ptr, recon_stride, sse); +} #endif unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, @@ -293,6 +354,19 @@ unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, dst_pixels_per_line, sse); return *sse; } + +unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, + xoffset, yoffset, dst_ptr, + dst_pixels_per_line, sse); + return *sse; +} #endif unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, From e6216d163ace0e1888de9caa409ec6f93aeb96fb Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 4 Jan 2013 12:05:40 -0800 Subject: [PATCH 46/77] Don't use tx32x32 for macroblocks. Change-Id: Ib674e0153ca360867ab7a20ba291ac9171a01250 --- vp9/encoder/vp9_rdopt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index a79cb5aad..5eea50779 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -4577,7 +4577,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (cpi->oxcf.arnr_max_frames == 0) && (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { mbmi->mode = ZEROMV; - if (cm->txfm_mode != TX_MODE_SELECT) + if (cm->txfm_mode <= ALLOW_8X8) mbmi->txfm_size = cm->txfm_mode; else mbmi->txfm_size = TX_16X16; From c13d9fef42c91fc7bf579b8a2cf8891d5f07f17f Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 7 Jan 2013 11:02:14 -0800 Subject: [PATCH 47/77] Re-enable support for static_threshold (encode_breakout). Change-Id: Ibd7380f478d3127f9db91d0a4fd2fd0dfde961ab --- vp9/encoder/vp9_block.h | 1 + vp9/encoder/vp9_encodeframe.c | 166 +++++++++++++++++----------------- vp9/encoder/vp9_rdopt.c | 1 + 3 files changed, 83 insertions(+), 85 deletions(-) diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index e8f6f46b0..94078970b 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -76,6 +76,7 @@ typedef struct { typedef struct { MODE_INFO mic; PARTITION_INFO partition_info; + int skip; int_mv best_ref_mv; int_mv second_best_ref_mv; int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 219295052..5a84fe997 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -493,6 +493,7 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int; } + x->skip = ctx->skip; if (!output_enabled) return; @@ -2125,7 +2126,6 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, int seg_ref_active; unsigned char ref_pred_flag; - x->skip = 0; #if CONFIG_SUPERBLOCKS assert(!xd->mode_info_context->mbmi.sb_type); #endif @@ -2178,7 +2178,6 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag); } - assert(mbmi->txfm_size <= TX_16X16); if (mbmi->ref_frame == INTRA_FRAME) { #ifdef ENC_DEBUG if (enc_debug) { @@ -2356,6 +2355,7 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, !((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) || (vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_EOB) && vp9_get_segdata(&x->e_mbd, segment_id, SEG_LVL_EOB) == 0))) { + assert(mbmi->txfm_size <= TX_16X16); if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED && mbmi->mode != SPLITMV) { cpi->txfm_count_16x16p[mbmi->txfm_size]++; @@ -2403,8 +2403,6 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, ENTROPY_CONTEXT_PLANES ta[4], tl[4]; const int mis = cm->mode_info_stride; - x->skip = 0; - if (cm->frame_type == KEY_FRAME) { if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { adjust_act_zbin(cpi, x); @@ -2494,22 +2492,22 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, #if CONFIG_TX32X32 if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { - vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, - dst, dst_y_stride); - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, - usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); - vp9_transform_sby_32x32(x); - vp9_transform_sbuv_16x16(x); - vp9_quantize_sby_32x32(x); - vp9_quantize_sbuv_16x16(x); - // TODO(rbultje): trellis optimize - vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); - vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); - vp9_recon_sby_s_c(&x->e_mbd, dst); - vp9_recon_sbuv_s_c(&x->e_mbd, udst, vdst); - if (!x->skip) { + vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, + dst, dst_y_stride); + vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride); + vp9_transform_sby_32x32(x); + vp9_transform_sbuv_16x16(x); + vp9_quantize_sby_32x32(x); + vp9_quantize_sbuv_16x16(x); + // TODO(rbultje): trellis optimize + vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); + vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); + vp9_recon_sby_s_c(&x->e_mbd, dst); + vp9_recon_sbuv_s_c(&x->e_mbd, udst, vdst); + vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled); } else { int mb_skip_context = @@ -2551,26 +2549,26 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, tp[n] = *t; xd->mode_info_context = mi + x_idx + y_idx * mis; - vp9_subtract_mby_s_c(x->src_diff, - src + x_idx * 16 + y_idx * 16 * src_y_stride, - src_y_stride, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride, - dst_y_stride); - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - vp9_fidct_mb(x); - vp9_recon_mby_s_c(&x->e_mbd, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride); - vp9_recon_mbuv_s_c(&x->e_mbd, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); - if (!x->skip) { + vp9_subtract_mby_s_c(x->src_diff, + src + x_idx * 16 + y_idx * 16 * src_y_stride, + src_y_stride, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride, + dst_y_stride); + vp9_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + vp9_fidct_mb(x); + vp9_recon_mby_s_c(&x->e_mbd, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride); + vp9_recon_mbuv_s_c(&x->e_mbd, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); + vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; } else { @@ -2650,8 +2648,6 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, ENTROPY_CONTEXT_PLANES ta[16], tl[16]; const int mis = cm->mode_info_stride; - x->skip = 0; - if (cm->frame_type == KEY_FRAME) { if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { adjust_act_zbin(cpi, x); @@ -2756,33 +2752,33 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, memcpy(&tl[n * 2], xd->left_context, sizeof(*tl) * 2); tp[n] = *t; xd->mode_info_context = mi + x_idx * 2 + y_idx * mis * 2; - vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, - src + x_idx * 32 + y_idx * 32 * src_y_stride, - src_y_stride, - dst + x_idx * 32 + y_idx * 32 * dst_y_stride, - dst_y_stride); - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, - usrc + x_idx * 16 + y_idx * 16 * src_uv_stride, - vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride, - src_uv_stride, - udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - dst_uv_stride); - vp9_transform_sby_32x32(x); - vp9_transform_sbuv_16x16(x); - vp9_quantize_sby_32x32(x); - vp9_quantize_sbuv_16x16(x); - // TODO(rbultje): trellis optimize - vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); - vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); - vp9_recon_sby_s_c(&x->e_mbd, - dst + 32 * x_idx + 32 * y_idx * dst_y_stride, - dst_y_stride); - vp9_recon_sbuv_s_c(&x->e_mbd, - udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride); - if (!x->skip) { + vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, + src + x_idx * 32 + y_idx * 32 * src_y_stride, + src_y_stride, + dst + x_idx * 32 + y_idx * 32 * dst_y_stride, + dst_y_stride); + vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + usrc + x_idx * 16 + y_idx * 16 * src_uv_stride, + vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride, + src_uv_stride, + udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + dst_uv_stride); + vp9_transform_sby_32x32(x); + vp9_transform_sbuv_16x16(x); + vp9_quantize_sby_32x32(x); + vp9_quantize_sbuv_16x16(x); + // TODO(rbultje): trellis optimize + vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); + vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); + vp9_recon_sby_s_c(&x->e_mbd, + dst + 32 * x_idx + 32 * y_idx * dst_y_stride, + dst_y_stride); + vp9_recon_sbuv_s_c(&x->e_mbd, + udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride); + vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled); } else { int mb_skip_context = cpi->common.mb_no_coeff_skip ? @@ -2827,26 +2823,26 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, tp[n] = *t; xd->mode_info_context = mi + x_idx + y_idx * mis; - vp9_subtract_mby_s_c(x->src_diff, - src + x_idx * 16 + y_idx * 16 * src_y_stride, - src_y_stride, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride, - dst_y_stride); - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - vp9_fidct_mb(x); - vp9_recon_mby_s_c(&x->e_mbd, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride); - vp9_recon_mbuv_s_c(&x->e_mbd, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); - if (!x->skip) { + vp9_subtract_mby_s_c(x->src_diff, + src + x_idx * 16 + y_idx * 16 * src_y_stride, + src_y_stride, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride, + dst_y_stride); + vp9_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + vp9_fidct_mb(x); + vp9_recon_mby_s_c(&x->e_mbd, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride); + vp9_recon_mbuv_s_c(&x->e_mbd, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); + vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; } else { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 5eea50779..267dd0aa5 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3403,6 +3403,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, // Take a snapshot of the coding context so it can be // restored if we decide to encode this way + ctx->skip = x->skip; ctx->best_mode_index = mode_index; vpx_memcpy(&ctx->mic, xd->mode_info_context, sizeof(MODE_INFO)); From 3ed14846e1161b348c14dc3fc8867c87a9d077fc Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 7 Jan 2013 11:41:49 -0800 Subject: [PATCH 48/77] Remove a few redundant function arguments in encodeframe.c. Also reindent a block of code that was misindented after addition of the tx32x32 experiment. Change-Id: Ic3e4aae3effd8a40136da68c9f382af03632ba08 --- vp9/encoder/vp9_encodeframe.c | 180 +++++++++++++++++----------------- 1 file changed, 90 insertions(+), 90 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 5a84fe997..702c35831 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -433,11 +433,12 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x, } #endif -static void update_state(VP9_COMP *cpi, MACROBLOCK *x, +static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, int block_size, int output_enabled) { int i, x_idx, y; - MACROBLOCKD *xd = &x->e_mbd; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *mi = &ctx->mic; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; int mb_mode = mi->mbmi.mode; @@ -748,14 +749,14 @@ static void set_offsets(VP9_COMP *cpi, } static void pick_mb_modes(VP9_COMP *cpi, - VP9_COMMON *cm, int mb_row, int mb_col, - MACROBLOCK *x, - MACROBLOCKD *xd, TOKENEXTRA **tp, int *totalrate, int *totaldist) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; int i; int recon_yoffset, recon_uvoffset; ENTROPY_CONTEXT_PLANES left_context[2]; @@ -870,14 +871,14 @@ static void pick_mb_modes(VP9_COMP *cpi, #if CONFIG_SUPERBLOCKS static void pick_sb_modes(VP9_COMP *cpi, - VP9_COMMON *cm, int mb_row, int mb_col, - MACROBLOCK *x, - MACROBLOCKD *xd, TOKENEXTRA **tp, int *totalrate, int *totaldist) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; int recon_yoffset, recon_uvoffset; set_offsets(cpi, mb_row, mb_col, 32, &recon_yoffset, &recon_uvoffset); @@ -907,14 +908,14 @@ static void pick_sb_modes(VP9_COMP *cpi, #if CONFIG_SUPERBLOCKS64 static void pick_sb64_modes(VP9_COMP *cpi, - VP9_COMMON *cm, int mb_row, int mb_col, - MACROBLOCK *x, - MACROBLOCKD *xd, TOKENEXTRA **tp, int *totalrate, int *totaldist) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; int recon_yoffset, recon_uvoffset; set_offsets(cpi, mb_row, mb_col, 64, &recon_yoffset, &recon_uvoffset); @@ -1001,20 +1002,20 @@ static void update_stats(VP9_COMP *cpi) { } static void encode_sb(VP9_COMP *cpi, - VP9_COMMON *cm, int mb_row, int mb_col, int output_enabled, - MACROBLOCK *x, - MACROBLOCKD *xd, TOKENEXTRA **tp, int is_sb) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; int recon_yoffset, recon_uvoffset; #if CONFIG_SUPERBLOCKS cpi->sb32_count[is_sb]++; if (is_sb) { set_offsets(cpi, mb_row, mb_col, 32, &recon_yoffset, &recon_uvoffset); - update_state(cpi, x, &x->sb32_context[xd->sb_index], 32, output_enabled); + update_state(cpi, &x->sb32_context[xd->sb_index], 32, output_enabled); encode_superblock32(cpi, tp, recon_yoffset, recon_uvoffset, output_enabled, mb_row, mb_col); @@ -1043,7 +1044,7 @@ static void encode_sb(VP9_COMP *cpi, set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16, &recon_yoffset, &recon_uvoffset); xd->mb_index = i; - update_state(cpi, x, &x->mb_context[xd->sb_index][i], 16, output_enabled); + update_state(cpi, &x->mb_context[xd->sb_index][i], 16, output_enabled); #if !CONFIG_SUPERBLOCKS // Copy current MB to a work buffer @@ -1082,18 +1083,19 @@ static void encode_sb(VP9_COMP *cpi, #if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 static void encode_sb64(VP9_COMP *cpi, - VP9_COMMON *cm, int mb_row, int mb_col, - MACROBLOCK *x, - MACROBLOCKD *xd, TOKENEXTRA **tp, int is_sb[4]) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + cpi->sb64_count[is_sb[0] == 2]++; if (is_sb[0] == 2) { int recon_yoffset, recon_uvoffset; set_offsets(cpi, mb_row, mb_col, 64, &recon_yoffset, &recon_uvoffset); - update_state(cpi, x, &x->sb64_context, 64, 1); + update_state(cpi, &x->sb64_context, 64, 1); encode_superblock64(cpi, tp, recon_yoffset, recon_uvoffset, 1, mb_row, mb_col); update_stats(cpi); @@ -1114,7 +1116,7 @@ static void encode_sb64(VP9_COMP *cpi, continue; } xd->sb_index = i; - encode_sb(cpi, cm, mb_row + 2 * y_idx, mb_col + 2 * x_idx, 1, x, xd, tp, + encode_sb(cpi, mb_row + 2 * y_idx, mb_col + 2 * x_idx, 1, tp, is_sb[i]); } } @@ -1122,12 +1124,12 @@ static void encode_sb64(VP9_COMP *cpi, #endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 static void encode_sb_row(VP9_COMP *cpi, - VP9_COMMON *cm, int mb_row, - MACROBLOCK *x, - MACROBLOCKD *xd, TOKENEXTRA **tp, int *totalrate) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; int mb_col; int mb_cols = cm->mb_cols; @@ -1159,8 +1161,8 @@ static void encode_sb_row(VP9_COMP *cpi, xd->sb_index = i; - pick_mb_modes(cpi, cm, mb_row + y_idx, mb_col + x_idx, - x, xd, tp, &mb_rate, &mb_dist); + pick_mb_modes(cpi, mb_row + y_idx, mb_col + x_idx, + tp, &mb_rate, &mb_dist); #if CONFIG_SUPERBLOCKS mb_rate += vp9_cost_bit(cm->sb32_coded, 0); #endif @@ -1169,8 +1171,8 @@ static void encode_sb_row(VP9_COMP *cpi, if (!((( mb_cols & 1) && mb_col + x_idx == mb_cols - 1) || ((cm->mb_rows & 1) && mb_row + y_idx == cm->mb_rows - 1))) { /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ - pick_sb_modes(cpi, cm, mb_row + y_idx, mb_col + x_idx, - x, xd, tp, &sb_rate, &sb_dist); + pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx, + tp, &sb_rate, &sb_dist); sb_rate += vp9_cost_bit(cm->sb32_coded, 1); } @@ -1197,9 +1199,9 @@ static void encode_sb_row(VP9_COMP *cpi, // pixels of the lower level; also, inverting SB/MB order (big->small // instead of small->big) means we can use as threshold for small, which // may enable breakouts if RD is not good enough (i.e. faster) - encode_sb(cpi, cm, mb_row + y_idx, mb_col + x_idx, + encode_sb(cpi, mb_row + y_idx, mb_col + x_idx, !(CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64), - x, xd, tp, is_sb[i]); + tp, is_sb[i]); } #if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 @@ -1209,8 +1211,7 @@ static void encode_sb_row(VP9_COMP *cpi, if (!((( mb_cols & 3) && mb_col + 3 >= mb_cols) || ((cm->mb_rows & 3) && mb_row + 3 >= cm->mb_rows))) { - pick_sb64_modes(cpi, cm, mb_row, mb_col, - x, xd, tp, &sb64_rate, &sb64_dist); + pick_sb64_modes(cpi, mb_row, mb_col, tp, &sb64_rate, &sb64_dist); sb64_rate += vp9_cost_bit(cm->sb64_coded, 1); } @@ -1228,7 +1229,7 @@ static void encode_sb_row(VP9_COMP *cpi, #if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 assert(tp_orig == *tp); - encode_sb64(cpi, cm, mb_row, mb_col, x, xd, tp, is_sb); + encode_sb64(cpi, mb_row, mb_col, tp, is_sb); assert(tp_orig < *tp); #endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 } @@ -1398,7 +1399,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { { // For each row of SBs in the frame for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) { - encode_sb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate); + encode_sb_row(cpi, mb_row, &tp, &totalrate); } cpi->tok_count = (unsigned int)(tp - cpi->tok); @@ -1944,13 +1945,12 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) { #if CONFIG_SUPERBLOCKS static void update_sb_skip_coeff_state(VP9_COMP *cpi, - MACROBLOCK *x, ENTROPY_CONTEXT_PLANES ta[4], ENTROPY_CONTEXT_PLANES tl[4], TOKENEXTRA *t[4], TOKENEXTRA **tp, - int skip[4], int output_enabled) -{ + int skip[4], int output_enabled) { + MACROBLOCK *const x = &cpi->mb; TOKENEXTRA tokens[4][16 * 25]; int n_tokens[4], n; @@ -2003,12 +2003,13 @@ static void update_sb_skip_coeff_state(VP9_COMP *cpi, #if CONFIG_SUPERBLOCKS64 static void update_sb64_skip_coeff_state(VP9_COMP *cpi, - MACROBLOCK *x, ENTROPY_CONTEXT_PLANES ta[16], ENTROPY_CONTEXT_PLANES tl[16], TOKENEXTRA *t[16], TOKENEXTRA **tp, int skip[16], int output_enabled) { + MACROBLOCK *const x = &cpi->mb; + if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_32X32) { TOKENEXTRA tokens[4][1024+512]; int n_tokens[4], n; @@ -2537,66 +2538,65 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; } skip[0] = skip[2] = skip[1] = skip[3] = mi->mbmi.mb_skip_coeff; - } else { + } else #endif - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; + { + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; - xd->left_context = cm->left_context + y_idx + (mb_row & 2); - xd->above_context = cm->above_context + mb_col + x_idx; - memcpy(&ta[n], xd->above_context, sizeof(ta[n])); - memcpy(&tl[n], xd->left_context, sizeof(tl[n])); - tp[n] = *t; - xd->mode_info_context = mi + x_idx + y_idx * mis; + xd->left_context = cm->left_context + y_idx + (mb_row & 2); + xd->above_context = cm->above_context + mb_col + x_idx; + memcpy(&ta[n], xd->above_context, sizeof(ta[n])); + memcpy(&tl[n], xd->left_context, sizeof(tl[n])); + tp[n] = *t; + xd->mode_info_context = mi + x_idx + y_idx * mis; - if (!x->skip) { - vp9_subtract_mby_s_c(x->src_diff, - src + x_idx * 16 + y_idx * 16 * src_y_stride, - src_y_stride, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride, - dst_y_stride); - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - vp9_fidct_mb(x); - vp9_recon_mby_s_c(&x->e_mbd, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride); - vp9_recon_mbuv_s_c(&x->e_mbd, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); + if (!x->skip) { + vp9_subtract_mby_s_c(x->src_diff, + src + x_idx * 16 + y_idx * 16 * src_y_stride, + src_y_stride, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride, + dst_y_stride); + vp9_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + vp9_fidct_mb(x); + vp9_recon_mby_s_c(&x->e_mbd, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride); + vp9_recon_mbuv_s_c(&x->e_mbd, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); - vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); - skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; - } else { - int mb_skip_context = - cpi->common.mb_no_coeff_skip ? - (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + - (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff : - 0; - xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1; - if (cpi->common.mb_no_coeff_skip) { - // TODO(rbultje) this should be done per-sb instead of per-mb? - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_mb_tokens_context(xd); + vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); + skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; } else { - vp9_stuff_mb(cpi, xd, t, !output_enabled); - // TODO(rbultje) this should be done per-sb instead of per-mb? - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; + int mb_skip_context = cpi->common.mb_no_coeff_skip ? + (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + + (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff : + 0; + xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1; + if (cpi->common.mb_no_coeff_skip) { + // TODO(rbultje) this should be done per-sb instead of per-mb? + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; + vp9_reset_mb_tokens_context(xd); + } else { + vp9_stuff_mb(cpi, xd, t, !output_enabled); + // TODO(rbultje) this should be done per-sb instead of per-mb? + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; + } } } + + xd->mode_info_context = mi; + update_sb_skip_coeff_state(cpi, ta, tl, tp, t, skip, output_enabled); } - xd->mode_info_context = mi; - update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip, output_enabled); -#if CONFIG_TX32X32 - } -#endif if (output_enabled) { if (cm->txfm_mode == TX_MODE_SELECT && !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) || @@ -2866,7 +2866,7 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, } xd->mode_info_context = mi; - update_sb64_skip_coeff_state(cpi, x, ta, tl, tp, t, skip, output_enabled); + update_sb64_skip_coeff_state(cpi, ta, tl, tp, t, skip, output_enabled); if (output_enabled) { if (cm->txfm_mode == TX_MODE_SELECT && From d278d01836f72f8b9fb0f920a1057a43b74867ae Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Wed, 19 Dec 2012 11:34:49 -0800 Subject: [PATCH 49/77] minor loop filter refactoring and cleanup This commit did a couple of minor cleanup/refactoring to prepare for futher loop filter experiments. It merged y_only version of loop filter function into the regular one, which makes sure that same logic is used for functions for picking level and for actual loop filtering. Change-Id: Id10c94dccd45f58e5310bacfdf6ee63cbb60b86f --- vp9/common/vp9_loopfilter.c | 201 +++++++++--------------------------- vp9/common/vp9_loopfilter.h | 9 +- vp9/decoder/vp9_onyxd_if.c | 2 +- vp9/encoder/vp9_onyx_if.c | 2 +- vp9/encoder/vp9_picklpf.c | 6 +- 5 files changed, 57 insertions(+), 163 deletions(-) diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 5188aa47c..bce090ee7 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -176,49 +176,52 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, } } -void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { +static int mb_lf_skip(const MB_MODE_INFO *const mbmi) { + const MB_PREDICTION_MODE mode = mbmi->mode; + const int skip_coef = mbmi->mb_skip_coeff; + const int tx_size = mbmi->txfm_size; + return mode != B_PRED && mode != I8X8_PRED && mode != SPLITMV && + tx_size >= TX_16X16 && skip_coef; +} + +void vp9_loop_filter_frame(VP9_COMMON *cm, + MACROBLOCKD *xd, + int frame_filter_level, + int y_only) { YV12_BUFFER_CONFIG *post = cm->frame_to_show; loop_filter_info_n *lfi_n = &cm->lf_info; struct loop_filter_info lfi; - - FRAME_TYPE frame_type = cm->frame_type; - - int mb_row; - int mb_col; - - int filter_level; - + const FRAME_TYPE frame_type = cm->frame_type; + int mb_row, mb_col; uint8_t *y_ptr, *u_ptr, *v_ptr; /* Point at base of Mb MODE_INFO list */ const MODE_INFO *mode_info_context = cm->mi; -#if CONFIG_SUPERBLOCKS const int mis = cm->mode_info_stride; -#endif /* Initialize the loop filter for this frame. */ - vp9_loop_filter_frame_init(cm, xd, cm->filter_level); - + vp9_loop_filter_frame_init(cm, xd, frame_filter_level); /* Set up the buffer pointers */ y_ptr = post->y_buffer; - u_ptr = post->u_buffer; - v_ptr = post->v_buffer; + if (y_only) { + u_ptr = 0; + v_ptr = 0; + } else { + u_ptr = post->u_buffer; + v_ptr = post->v_buffer; + } /* vp9_filter each macro block */ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != I8X8_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; const int seg = mode_info_context->mbmi.segment_id; const int ref_frame = mode_info_context->mbmi.ref_frame; - int tx_type = mode_info_context->mbmi.txfm_size; - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + const int filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; if (filter_level) { + const int skip_lf = mb_lf_skip(&mode_info_context->mbmi); + const int tx_size = mode_info_context->mbmi.txfm_size; if (cm->filter_type == NORMAL_LOOPFILTER) { const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; lfi.mblim = lfi_n->mblim[filter_level]; @@ -229,19 +232,17 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { if (mb_col > 0 #if CONFIG_SUPERBLOCKS && !((mb_col & 1) && mode_info_context->mbmi.sb_type && - ((mode_info_context[0].mbmi.mb_skip_coeff && - mode_info_context[-1].mbmi.mb_skip_coeff) + ((skip_lf && mb_lf_skip(&mode_info_context[-1].mbmi)) #if CONFIG_TX32X32 - || mode_info_context[-1].mbmi.txfm_size == TX_32X32 + || tx_size == TX_32X32 #endif - )) + )) #endif ) vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); - - if (!skip_lf && tx_type < TX_16X16) { - if (tx_type == TX_8X8) + if (!skip_lf) { + if (tx_size >= TX_8X8) vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); else @@ -249,24 +250,21 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { post->uv_stride, &lfi); } - /* don't apply across umv border */ if (mb_row > 0 #if CONFIG_SUPERBLOCKS && !((mb_row & 1) && mode_info_context->mbmi.sb_type && - ((mode_info_context[0].mbmi.mb_skip_coeff && - mode_info_context[-mis].mbmi.mb_skip_coeff) + ((skip_lf && mb_lf_skip(&mode_info_context[-mis].mbmi)) #if CONFIG_TX32X32 - || mode_info_context[-mis].mbmi.txfm_size == TX_32X32 + || tx_size == TX_32X32 #endif )) #endif ) vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); - - if (!skip_lf && tx_type < TX_16X16) { - if (tx_type == TX_8X8) + if (!skip_lf) { + if (tx_size >= TX_8X8) vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); else @@ -275,151 +273,48 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { } } else { // FIXME: Not 8x8 aware - if (mb_col > 0 + if (mb_col > 0 && + !(skip_lf && mb_lf_skip(&mode_info_context[-1].mbmi)) #if CONFIG_SUPERBLOCKS - && !((mb_col & 1) && mode_info_context->mbmi.sb_type && - mode_info_context[0].mbmi.mb_skip_coeff && - mode_info_context[-1].mbmi.mb_skip_coeff) + && !((mb_col & 1) && mode_info_context->mbmi.sb_type) #endif ) vp9_loop_filter_simple_mbv(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); - if (!skip_lf) vp9_loop_filter_simple_bv(y_ptr, post->y_stride, lfi_n->blim[filter_level]); /* don't apply across umv border */ - if (mb_row > 0 + if (mb_row > 0 && + !(skip_lf && mb_lf_skip(&mode_info_context[-mis].mbmi)) #if CONFIG_SUPERBLOCKS - && !((mb_row & 1) && mode_info_context->mbmi.sb_type && - mode_info_context[0].mbmi.mb_skip_coeff && - mode_info_context[-cm->mode_info_stride].mbmi.mb_skip_coeff) + && !((mb_row & 1) && mode_info_context->mbmi.sb_type) #endif ) vp9_loop_filter_simple_mbh(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); - if (!skip_lf) vp9_loop_filter_simple_bh(y_ptr, post->y_stride, lfi_n->blim[filter_level]); } } - y_ptr += 16; - u_ptr += 8; - v_ptr += 8; - + if (!y_only) { + u_ptr += 8; + v_ptr += 8; + } mode_info_context++; /* step to next MB */ } - y_ptr += post->y_stride * 16 - post->y_width; - u_ptr += post->uv_stride * 8 - post->uv_width; - v_ptr += post->uv_stride * 8 - post->uv_width; - + if (!y_only) { + u_ptr += post->uv_stride * 8 - post->uv_width; + v_ptr += post->uv_stride * 8 - post->uv_width; + } mode_info_context++; /* Skip border mb */ } } -void vp9_loop_filter_frame_yonly(VP9_COMMON *cm, MACROBLOCKD *xd, - int default_filt_lvl) { - YV12_BUFFER_CONFIG *post = cm->frame_to_show; - - uint8_t *y_ptr; - int mb_row; - int mb_col; - - loop_filter_info_n *lfi_n = &cm->lf_info; - struct loop_filter_info lfi; - - int filter_level; - FRAME_TYPE frame_type = cm->frame_type; - - /* Point at base of Mb MODE_INFO list */ - const MODE_INFO *mode_info_context = cm->mi; - -#if 0 - if (default_filt_lvl == 0) /* no filter applied */ - return; -#endif - - /* Initialize the loop filter for this frame. */ - vp9_loop_filter_frame_init(cm, xd, default_filt_lvl); - - /* Set up the buffer pointers */ - y_ptr = post->y_buffer; - - /* vp9_filter each macro block */ - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != I8X8_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - int tx_type = mode_info_context->mbmi.txfm_size; - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if (filter_level) { - if (cm->filter_type == NORMAL_LOOPFILTER) { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp9_loop_filter_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi); - - if (!skip_lf && tx_type != TX_16X16) { - if (tx_type == TX_8X8) - vp9_loop_filter_bv8x8(y_ptr, 0, 0, post->y_stride, 0, &lfi); - else - vp9_loop_filter_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi); - } - - /* don't apply across umv border */ - if (mb_row > 0) - vp9_loop_filter_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi); - - if (!skip_lf && tx_type != TX_16X16) { - if (tx_type == TX_8X8) - vp9_loop_filter_bh8x8(y_ptr, 0, 0, post->y_stride, 0, &lfi); - else - vp9_loop_filter_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi); - } - } else { - // FIXME: Not 8x8 aware - if (mb_col > 0) - vp9_loop_filter_simple_mbv(y_ptr, post->y_stride, - lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp9_loop_filter_simple_bv(y_ptr, post->y_stride, - lfi_n->blim[filter_level]); - - /* don't apply across umv border */ - if (mb_row > 0) - vp9_loop_filter_simple_mbh(y_ptr, post->y_stride, - lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp9_loop_filter_simple_bh(y_ptr, post->y_stride, - lfi_n->blim[filter_level]); - } - } - - y_ptr += 16; - mode_info_context++; /* step to next MB */ - } - - y_ptr += post->y_stride * 16 - post->y_width; - mode_info_context++; /* Skip border mb */ - } -} void vp9_loop_filter_partial_frame(VP9_COMMON *cm, MACROBLOCKD *xd, int default_filt_lvl) { diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h index f05dabf08..dbe8e1f83 100644 --- a/vp9/common/vp9_loopfilter.h +++ b/vp9/common/vp9_loopfilter.h @@ -80,16 +80,15 @@ void vp9_loop_filter_frame_init(struct VP9Common *cm, struct macroblockd *mbd, int default_filt_lvl); -void vp9_loop_filter_frame(struct VP9Common *cm, struct macroblockd *mbd); +void vp9_loop_filter_frame(struct VP9Common *cm, + struct macroblockd *mbd, + int filter_level, + int y_only); void vp9_loop_filter_partial_frame(struct VP9Common *cm, struct macroblockd *mbd, int default_filt_lvl); -void vp9_loop_filter_frame_yonly(struct VP9Common *cm, - struct macroblockd *mbd, - int default_filt_lvl); - void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl); diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index b6b686377..b3b75af70 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -382,7 +382,7 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size, if (cm->filter_level) { /* Apply the loop filter if appropriate. */ - vp9_loop_filter_frame(cm, &pbi->mb); + vp9_loop_filter_frame(cm, &pbi->mb, cm->filter_level, 0); } vp8_yv12_extend_frame_borders(cm->frame_to_show); } diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 44f20adbe..14948a0de 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -2820,7 +2820,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { if (cm->filter_level > 0) { vp9_set_alt_lf_level(cpi, cm->filter_level); - vp9_loop_filter_frame(cm, &cpi->mb.e_mbd); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level, 0); } vp8_yv12_extend_frame_borders(cm->frame_to_show); diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 556851ce0..7091c4932 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -268,7 +268,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { // Get baseline error score vp9_set_alt_lf_level(cpi, filt_mid); - vp9_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_mid, 1); best_err = vp9_calc_ss_err(sd, cm->frame_to_show); filt_best = filt_mid; @@ -293,7 +293,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { if ((filt_direction <= 0) && (filt_low != filt_mid)) { // Get Low filter error score vp9_set_alt_lf_level(cpi, filt_low); - vp9_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_low, 1); filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); @@ -313,7 +313,7 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi) { // Now look at filt_high if ((filt_direction >= 0) && (filt_high != filt_mid)) { vp9_set_alt_lf_level(cpi, filt_high); - vp9_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_high, 1); filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); From c14439c3d3db8dfa44a30c4edc50f56250ce4cd3 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Thu, 13 Dec 2012 23:53:11 -0800 Subject: [PATCH 50/77] reset segement map on key frame This is to fix a decoder crash when decoder skips a number of frame to continue decoding from a later key frame. Change-Id: I3ba116eba6c3440e0528a21f53745f694302e4ad --- vp9/decoder/vp9_decodframe.c | 9 +++++++-- vp9/encoder/vp9_ratectrl.c | 3 +++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index d524ade66..36eadc482 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -40,7 +40,7 @@ #define COEFCOUNT_TESTING -// #define DEC_DEBUG +//#define DEC_DEBUG #ifdef DEC_DEBUG int dec_debug = 0; #endif @@ -1311,7 +1311,10 @@ static void init_frame(VP9D_COMP *pbi) { MACROBLOCKD *const xd = &pbi->mb; if (pc->frame_type == KEY_FRAME) { - /* Various keyframe initializations */ + + if (pc->last_frame_seg_map) + vpx_memset(pc->last_frame_seg_map, 0, (pc->mb_rows * pc->mb_cols)); + vp9_init_mv_probs(pc); vp9_init_mbmode_probs(pc); @@ -1354,6 +1357,7 @@ static void init_frame(VP9D_COMP *pbi) { vp9_update_mode_info_border(pc, pc->mip); vp9_update_mode_info_in_image(pc, pc->mi); + } else { if (!pc->use_bilinear_mc_filter) @@ -1600,6 +1604,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pc->ref_pred_probs[0] = 120; pc->ref_pred_probs[1] = 80; pc->ref_pred_probs[2] = 40; + } else { for (i = 0; i < PREDICTION_PROBS; i++) { if (vp9_read_bit(&header_bc)) diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 08ad54b51..ee87fba03 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -255,6 +255,9 @@ void vp9_setup_key_frame(VP9_COMP *cpi) { vp9_init_mbmode_probs(& cpi->common); vp9_default_bmode_probs(cm->fc.bmode_prob); + if(cm->last_frame_seg_map) + vpx_memset(cm->last_frame_seg_map, 0, (cm->mb_rows * cm->mb_cols)); + vp9_init_mv_probs(& cpi->common); // cpi->common.filter_level = 0; // Reset every key frame. From 8d568312a2e6882a336eb3525fbe6b9e752163f3 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Tue, 8 Jan 2013 10:44:19 -0800 Subject: [PATCH 51/77] vp9_sub_pixel_variance16x2 SSE2 optimization About 5% decoder speedup. Change-Id: Ib6687d337af758a536a0e7e289f400990f1f9794 --- vp9/common/vp9_findnearmv.c | 38 ++++----- vp9/common/vp9_rtcd_defs.sh | 5 ++ vp9/common/x86/vp9_subpixel_variance_sse2.c | 90 +++++++++++++++++++++ vp9/vp9_common.mk | 3 + 4 files changed, 117 insertions(+), 19 deletions(-) create mode 100644 vp9/common/x86/vp9_subpixel_variance_sse2.c diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index 85982fc18..54fd8d36f 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -185,33 +185,33 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, offset = ref_y_stride * row_offset + col_offset; score = 0; if (xd->up_available) { - vp9_sub_pixel_variance16x2_c(above_ref + offset, ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - above_src, xd->dst.y_stride, &sse); + vp9_sub_pixel_variance16x2(above_ref + offset, ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + above_src, xd->dst.y_stride, &sse); score += sse; #if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { - vp9_sub_pixel_variance16x2_c(above_ref + offset + 16, - ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - above_src + 16, xd->dst.y_stride, &sse); + vp9_sub_pixel_variance16x2(above_ref + offset + 16, + ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + above_src + 16, xd->dst.y_stride, &sse); score += sse; } #if CONFIG_SUPERBLOCKS64 if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { - vp9_sub_pixel_variance16x2_c(above_ref + offset + 32, - ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - above_src + 32, xd->dst.y_stride, &sse); + vp9_sub_pixel_variance16x2(above_ref + offset + 32, + ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + above_src + 32, xd->dst.y_stride, &sse); score += sse; - vp9_sub_pixel_variance16x2_c(above_ref + offset + 48, - ref_y_stride, - SP(this_mv.as_mv.col), - SP(this_mv.as_mv.row), - above_src + 48, xd->dst.y_stride, &sse); + vp9_sub_pixel_variance16x2(above_ref + offset + 48, + ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + above_src + 48, xd->dst.y_stride, &sse); score += sse; } #endif diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index f02ee0260..e41c18ab0 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -252,6 +252,11 @@ specialize vp9_sad16x3 sse2 prototype unsigned int vp9_sad3x16 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride" specialize vp9_sad3x16 sse2 +if [ "$CONFIG_SUBPELREFMV" = "yes" ]; then +prototype unsigned int vp9_sub_pixel_variance16x2 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse" +specialize vp9_sub_pixel_variance16x2 sse2 +fi + # # Sub Pixel Filters # diff --git a/vp9/common/x86/vp9_subpixel_variance_sse2.c b/vp9/common/x86/vp9_subpixel_variance_sse2.c new file mode 100644 index 000000000..91cd75f22 --- /dev/null +++ b/vp9/common/x86/vp9_subpixel_variance_sse2.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#define HALFNDX 8 + +void vp9_half_horiz_variance16x_h_sse2(const unsigned char *ref_ptr, + int ref_pixels_per_line, + const unsigned char *src_ptr, + int src_pixels_per_line, + unsigned int Height, + int *sum, + unsigned int *sumsquared); + +void vp9_half_vert_variance16x_h_sse2(const unsigned char *ref_ptr, + int ref_pixels_per_line, + const unsigned char *src_ptr, + int src_pixels_per_line, + unsigned int Height, + int *sum, + unsigned int *sumsquared); + +void vp9_half_horiz_vert_variance16x_h_sse2(const unsigned char *ref_ptr, + int ref_pixels_per_line, + const unsigned char *src_ptr, + int src_pixels_per_line, + unsigned int Height, + int *sum, + unsigned int *sumsquared); + +void vp9_filter_block2d_bil_var_sse2(const unsigned char *ref_ptr, + int ref_pixels_per_line, + const unsigned char *src_ptr, + int src_pixels_per_line, + unsigned int Height, + int xoffset, + int yoffset, + int *sum, + unsigned int *sumsquared); + +unsigned int vp9_sub_pixel_variance16x2_sse2(const unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + int xsum0, xsum1; + unsigned int xxsum0, xxsum1; + + if (xoffset == HALFNDX && yoffset == 0) { + vp9_half_horiz_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 2, + &xsum0, &xxsum0); + } else if (xoffset == 0 && yoffset == HALFNDX) { + vp9_half_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 2, + &xsum0, &xxsum0); + } else if (xoffset == HALFNDX && yoffset == HALFNDX) { + vp9_half_horiz_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 2, + &xsum0, &xxsum0); + } else { + vp9_filter_block2d_bil_var_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 2, + xoffset, yoffset, + &xsum0, &xxsum0); + + vp9_filter_block2d_bil_var_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 2, + xoffset, yoffset, + &xsum1, &xxsum1); + xsum0 += xsum1; + xxsum0 += xxsum1; + } + + *sse = xxsum0; + return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 5)); +} diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 4d17233e7..d84c65cf9 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -95,6 +95,9 @@ VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_wrapper_sse2.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_iwalsh_sse2.asm +ifeq ($(CONFIG_SUBPELREFMV),yes) +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_variance_sse2.c +endif VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_ssse3.asm ifeq ($(CONFIG_POSTPROC),yes) From 4455036cfc3c6b7fb9d7b85af1982e7df3711a05 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 8 Jan 2013 10:29:22 -0800 Subject: [PATCH 52/77] Merge superblocks (32x32) experiment. Change-Id: I0df99742029834a85c4933652b0587cf5b6b2587 --- vp9/common/vp9_blockd.c | 4 +- vp9/common/vp9_blockd.h | 28 +--- vp9/common/vp9_default_coef_probs.h | 4 +- vp9/common/vp9_entropy.c | 12 +- vp9/common/vp9_entropy.h | 8 +- vp9/common/vp9_entropymode.c | 16 -- vp9/common/vp9_findnearmv.c | 8 - vp9/common/vp9_invtrans.c | 2 +- vp9/common/vp9_invtrans.h | 2 +- vp9/common/vp9_loopfilter.c | 42 ++---- vp9/common/vp9_mvref_common.c | 7 - vp9/common/vp9_onyxc_int.h | 22 +-- vp9/common/vp9_pred_common.c | 12 -- vp9/common/vp9_recon.c | 4 +- vp9/common/vp9_reconinter.c | 4 +- vp9/common/vp9_reconinter.h | 4 +- vp9/common/vp9_reconintra.c | 14 +- vp9/common/vp9_reconintra.h | 2 - vp9/common/vp9_rtcd_defs.sh | 36 ++++- vp9/decoder/vp9_decodemv.c | 48 ++---- vp9/decoder/vp9_decodframe.c | 101 +++++-------- vp9/decoder/vp9_dequantize.c | 8 +- vp9/decoder/vp9_dequantize.h | 2 - vp9/decoder/vp9_detokenize.c | 4 +- vp9/decoder/vp9_detokenize.h | 2 +- vp9/decoder/vp9_idct_blk.c | 8 - vp9/encoder/vp9_bitstream.c | 65 ++------- vp9/encoder/vp9_block.h | 17 +-- vp9/encoder/vp9_encodeframe.c | 134 +++++------------ vp9/encoder/vp9_encodemb.c | 4 +- vp9/encoder/vp9_encodemb.h | 4 +- vp9/encoder/vp9_firstpass.c | 5 - vp9/encoder/vp9_mcomp.c | 44 ------ vp9/encoder/vp9_onyx_if.c | 8 +- vp9/encoder/vp9_onyx_int.h | 14 +- vp9/encoder/vp9_quantize.c | 16 +- vp9/encoder/vp9_quantize.h | 2 +- vp9/encoder/vp9_ratectrl.c | 8 +- vp9/encoder/vp9_rdopt.c | 217 +++++++++++++--------------- vp9/encoder/vp9_segmentation.c | 11 +- vp9/encoder/vp9_tokenize.c | 28 ++-- vp9/encoder/vp9_tokenize.h | 6 +- vp9/encoder/vp9_variance_c.c | 25 ++-- 43 files changed, 343 insertions(+), 669 deletions(-) diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c index b8ad433dc..c3fa07284 100644 --- a/vp9/common/vp9_blockd.c +++ b/vp9/common/vp9_blockd.c @@ -16,7 +16,7 @@ const uint8_t vp9_block2left[TX_SIZE_MAX_SB][25] = { {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8}, {0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}, -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8} #endif }; @@ -24,7 +24,7 @@ const uint8_t vp9_block2above[TX_SIZE_MAX_SB][25] = { {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8}, {0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}, -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8} #endif }; diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index bfc5b82e1..9f6e53356 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -122,7 +122,7 @@ typedef enum { TX_8X8 = 1, // 8x8 dct transform TX_16X16 = 2, // 16x16 dct transform TX_SIZE_MAX_MB = 3, // Number of different transforms available -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 TX_32X32 = TX_SIZE_MAX_MB, // 32x32 dct transform TX_SIZE_MAX_SB, // Number of transforms available to SBs #else @@ -226,7 +226,6 @@ typedef enum { MAX_REF_FRAMES = 4 } MV_REFERENCE_FRAME; -#if CONFIG_SUPERBLOCKS typedef enum { BLOCK_SIZE_MB16X16 = 0, BLOCK_SIZE_SB32X32 = 1, @@ -234,7 +233,6 @@ typedef enum { BLOCK_SIZE_SB64X64 = 2, #endif } BLOCK_SIZE_TYPE; -#endif typedef struct { MB_PREDICTION_MODE mode, uv_mode; @@ -274,13 +272,9 @@ typedef struct { // Flag to turn prediction signal filter on(1)/off(0 ) at the MB level unsigned int pred_filter_enabled; #endif - INTERPOLATIONFILTERTYPE interp_filter; + INTERPOLATIONFILTERTYPE interp_filter; -#if CONFIG_SUPERBLOCKS - // FIXME need a SB array of 4 MB_MODE_INFOs that - // only needs one sb_type. BLOCK_SIZE_TYPE sb_type; -#endif } MB_MODE_INFO; typedef struct { @@ -310,7 +304,7 @@ typedef struct blockd { union b_mode_info bmi; } BLOCKD; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 typedef struct superblockd { /* 32x32 Y and 16x16 U/V. No 2nd order transform yet. */ DECLARE_ALIGNED(16, int16_t, diff[32*32+16*16*2]); @@ -326,7 +320,7 @@ typedef struct macroblockd { DECLARE_ALIGNED(16, int16_t, dqcoeff[400]); DECLARE_ALIGNED(16, uint16_t, eobs[25]); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 SUPERBLOCKD sb_coeff_data; #endif @@ -417,14 +411,6 @@ typedef struct macroblockd { int corrupted; -#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) - /* This is an intermediate buffer currently used in sub-pixel motion search - * to keep a copy of the reference area. This buffer can be used for other - * purpose. - */ - DECLARE_ALIGNED(32, uint8_t, y_buf[22 * 32]); -#endif - int sb_index; int mb_index; // Index of the MB in the SB (0..3) int q_index; @@ -528,11 +514,9 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) { int ib = (int)(b - xd->block); if (ib >= 16) return tx_type; -#if CONFIG_SUPERBLOCKS // TODO(rbultje, debargha): Explore ADST usage for superblocks if (xd->mode_info_context->mbmi.sb_type) return tx_type; -#endif if (xd->mode_info_context->mbmi.mode == B_PRED && xd->q_index < ACTIVE_HT) { tx_type = txfm_map( @@ -585,11 +569,9 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) { int ib = (int)(b - xd->block); if (ib >= 16) return tx_type; -#if CONFIG_SUPERBLOCKS // TODO(rbultje, debargha): Explore ADST usage for superblocks if (xd->mode_info_context->mbmi.sb_type) return tx_type; -#endif if (xd->mode_info_context->mbmi.mode == I8X8_PRED && xd->q_index < ACTIVE_HT8) { // TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged @@ -620,11 +602,9 @@ static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) { int ib = (int)(b - xd->block); if (ib >= 16) return tx_type; -#if CONFIG_SUPERBLOCKS // TODO(rbultje, debargha): Explore ADST usage for superblocks if (xd->mode_info_context->mbmi.sb_type) return tx_type; -#endif if (xd->mode_info_context->mbmi.mode < I8X8_PRED && xd->q_index < ACTIVE_HT16) { tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index 6968dcd76..0c9131db2 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -1038,7 +1038,7 @@ static const vp9_coeff_probs default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] } } }; -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = { { /* block Type 0 */ { /* Coeff Band 0 */ @@ -1210,4 +1210,4 @@ static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = { } } }; -#endif // CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#endif // CONFIG_TX32X32 diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 1402c084e..cdc8bc14e 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -142,7 +142,7 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { 237, 252, 253, 238, 223, 239, 254, 255, }; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 #if CONFIG_DWT32X32HYBRID DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, @@ -547,7 +547,7 @@ DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_8x8_neighbors[64 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_16x16_neighbors[256 * MAX_NEIGHBORS]); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_32x32_neighbors[1024 * MAX_NEIGHBORS]); #endif @@ -628,7 +628,7 @@ void vp9_init_neighbors() { vp9_default_zig_zag1d_8x8_neighbors); init_scan_neighbors(vp9_default_zig_zag1d_16x16, 16, vp9_default_zig_zag1d_16x16_neighbors); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 init_scan_neighbors(vp9_default_zig_zag1d_32x32, 32, vp9_default_zig_zag1d_32x32_neighbors); #endif @@ -645,7 +645,7 @@ const int *vp9_get_coef_neighbors_handle(const int *scan) { return vp9_default_zig_zag1d_8x8_neighbors; } else if (scan == vp9_default_zig_zag1d_16x16) { return vp9_default_zig_zag1d_16x16_neighbors; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 } else if (scan == vp9_default_zig_zag1d_32x32) { return vp9_default_zig_zag1d_32x32_neighbors; #endif @@ -693,7 +693,7 @@ void vp9_default_coef_probs(VP9_COMMON *pc) { vpx_memcpy(pc->fc.hybrid_coef_probs_16x16, default_hybrid_coef_probs_16x16, sizeof(pc->fc.hybrid_coef_probs_16x16)); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vpx_memcpy(pc->fc.coef_probs_32x32, default_coef_probs_32x32, sizeof(pc->fc.coef_probs_32x32)); #endif @@ -840,7 +840,7 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) { cm->fc.pre_hybrid_coef_probs_16x16, BLOCK_TYPES_16X16, cm->fc.hybrid_coef_counts_16x16, count_sat, update_factor); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 update_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32, BLOCK_TYPES_32X32, cm->fc.coef_counts_32x32, count_sat, update_factor); diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 99826015a..cc65ffc0a 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -66,7 +66,7 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ #define BLOCK_TYPES_16X16 4 -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 #define BLOCK_TYPES_32X32 4 #endif @@ -77,7 +77,7 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_4x4[16]); extern DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]); extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]); #endif @@ -122,7 +122,7 @@ extern DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]); extern DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]); extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]); -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]); #endif @@ -154,7 +154,7 @@ extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_8x8_neighbors[ 64 * MAX_NEIGHBORS]); extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_16x16_neighbors[ 256 * MAX_NEIGHBORS]); -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_32x32_neighbors[ 1024 * MAX_NEIGHBORS]); #endif diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index d7c2d8e06..d7c7a3f66 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -272,13 +272,11 @@ const vp9_tree_index vp9_mv_ref_tree[8] = { -NEWMV, -SPLITMV }; -#if CONFIG_SUPERBLOCKS const vp9_tree_index vp9_sb_mv_ref_tree[6] = { -ZEROMV, 2, -NEARESTMV, 4, -NEARMV, -NEWMV }; -#endif const vp9_tree_index vp9_sub_mv_ref_tree[6] = { -LEFT4X4, 2, @@ -289,19 +287,15 @@ const vp9_tree_index vp9_sub_mv_ref_tree[6] = { struct vp9_token_struct vp9_bmode_encodings[VP9_NKF_BINTRAMODES]; struct vp9_token_struct vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES]; struct vp9_token_struct vp9_ymode_encodings[VP9_YMODES]; -#if CONFIG_SUPERBLOCKS struct vp9_token_struct vp9_sb_ymode_encodings[VP9_I32X32_MODES]; struct vp9_token_struct vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES]; -#endif struct vp9_token_struct vp9_kf_ymode_encodings[VP9_YMODES]; struct vp9_token_struct vp9_uv_mode_encodings[VP9_UV_MODES]; struct vp9_token_struct vp9_i8x8_mode_encodings[VP9_I8X8_MODES]; struct vp9_token_struct vp9_mbsplit_encodings[VP9_NUMMBSPLITS]; struct vp9_token_struct vp9_mv_ref_encoding_array[VP9_MVREFS]; -#if CONFIG_SUPERBLOCKS struct vp9_token_struct vp9_sb_mv_ref_encoding_array[VP9_MVREFS]; -#endif struct vp9_token_struct vp9_sub_mv_ref_encoding_array[VP9_SUBMVREFS]; void vp9_init_mbmode_probs(VP9_COMMON *x) { @@ -310,24 +304,20 @@ void vp9_init_mbmode_probs(VP9_COMMON *x) { vp9_tree_probs_from_distribution(VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree, x->fc.ymode_prob, bct, y_mode_cts); -#if CONFIG_SUPERBLOCKS vp9_tree_probs_from_distribution(VP9_I32X32_MODES, vp9_sb_ymode_encodings, vp9_sb_ymode_tree, x->fc.sb_ymode_prob, bct, y_mode_cts); -#endif { int i; for (i = 0; i < 8; i++) { vp9_tree_probs_from_distribution(VP9_YMODES, vp9_kf_ymode_encodings, vp9_kf_ymode_tree, x->kf_ymode_prob[i], bct, kf_y_mode_cts[i]); -#if CONFIG_SUPERBLOCKS vp9_tree_probs_from_distribution(VP9_I32X32_MODES, vp9_sb_kf_ymode_encodings, vp9_sb_kf_ymode_tree, x->sb_kf_ymode_prob[i], bct, kf_y_mode_cts[i]); -#endif } } { @@ -426,10 +416,8 @@ void vp9_entropy_mode_init() { vp9_tokens_from_tree(vp9_bmode_encodings, vp9_bmode_tree); vp9_tokens_from_tree(vp9_ymode_encodings, vp9_ymode_tree); vp9_tokens_from_tree(vp9_kf_ymode_encodings, vp9_kf_ymode_tree); -#if CONFIG_SUPERBLOCKS vp9_tokens_from_tree(vp9_sb_ymode_encodings, vp9_sb_ymode_tree); vp9_tokens_from_tree(vp9_sb_kf_ymode_encodings, vp9_sb_kf_ymode_tree); -#endif vp9_tokens_from_tree(vp9_uv_mode_encodings, vp9_uv_mode_tree); vp9_tokens_from_tree(vp9_i8x8_mode_encodings, vp9_i8x8_mode_tree); vp9_tokens_from_tree(vp9_mbsplit_encodings, vp9_mbsplit_tree); @@ -438,10 +426,8 @@ void vp9_entropy_mode_init() { vp9_tokens_from_tree_offset(vp9_mv_ref_encoding_array, vp9_mv_ref_tree, NEARESTMV); -#if CONFIG_SUPERBLOCKS vp9_tokens_from_tree_offset(vp9_sb_mv_ref_encoding_array, vp9_sb_mv_ref_tree, NEARESTMV); -#endif vp9_tokens_from_tree_offset(vp9_sub_mv_ref_encoding_array, vp9_sub_mv_ref_tree, LEFT4X4); } @@ -599,11 +585,9 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { update_mode_probs(VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree, cm->fc.ymode_counts, cm->fc.pre_ymode_prob, cm->fc.ymode_prob); -#if CONFIG_SUPERBLOCKS update_mode_probs(VP9_I32X32_MODES, vp9_sb_ymode_encodings, vp9_sb_ymode_tree, cm->fc.sb_ymode_counts, cm->fc.pre_sb_ymode_prob, cm->fc.sb_ymode_prob); -#endif for (i = 0; i < VP9_YMODES; ++i) { update_mode_probs(VP9_UV_MODES, vp9_uv_mode_encodings, vp9_uv_mode_tree, cm->fc.uv_mode_counts[i], cm->fc.pre_uv_mode_prob[i], diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index 85982fc18..30e721e07 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -190,7 +190,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, SP(this_mv.as_mv.row), above_src, xd->dst.y_stride, &sse); score += sse; -#if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { vp9_sub_pixel_variance16x2_c(above_ref + offset + 16, ref_y_stride, @@ -214,7 +213,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, above_src + 48, xd->dst.y_stride, &sse); score += sse; } -#endif #endif } if (xd->left_available) { @@ -223,7 +221,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, SP(this_mv.as_mv.row), left_src, xd->dst.y_stride, &sse); score += sse; -#if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 16, ref_y_stride, @@ -250,7 +247,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, xd->dst.y_stride, &sse); score += sse; } -#endif #endif } #else @@ -263,7 +259,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, if (xd->up_available) { score += vp9_sad16x3(above_src, xd->dst.y_stride, above_ref + offset, ref_y_stride); -#if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { score += vp9_sad16x3(above_src + 16, xd->dst.y_stride, above_ref + offset + 16, ref_y_stride); @@ -275,13 +270,11 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, score += vp9_sad16x3(above_src + 48, xd->dst.y_stride, above_ref + offset + 48, ref_y_stride); } -#endif #endif } if (xd->left_available) { score += vp9_sad3x16(left_src, xd->dst.y_stride, left_ref + offset, ref_y_stride); -#if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { score += vp9_sad3x16(left_src + xd->dst.y_stride * 16, xd->dst.y_stride, @@ -299,7 +292,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, left_ref + offset + ref_y_stride * 48, ref_y_stride); } -#endif #endif } #endif diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index 60deb5260..a09c0a80a 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -146,7 +146,7 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) { vp9_inverse_transform_mbuv_8x8(xd); } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb) { vp9_short_idct32x32(xd_sb->dqcoeff, xd_sb->diff, 64); } diff --git a/vp9/common/vp9_invtrans.h b/vp9/common/vp9_invtrans.h index 3cfb45fed..1f0164bb1 100644 --- a/vp9/common/vp9_invtrans.h +++ b/vp9/common/vp9_invtrans.h @@ -39,7 +39,7 @@ extern void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd); extern void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 extern void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb); extern void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb); #endif diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index bce090ee7..d91f98ebf 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -229,16 +229,13 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, lfi.lim = lfi_n->lim[filter_level]; lfi.hev_thr = lfi_n->hev_thr[hev_index]; - if (mb_col > 0 -#if CONFIG_SUPERBLOCKS - && !((mb_col & 1) && mode_info_context->mbmi.sb_type && - ((skip_lf && mb_lf_skip(&mode_info_context[-1].mbmi)) + if (mb_col > 0 && + !((mb_col & 1) && mode_info_context->mbmi.sb_type && + ((skip_lf && mb_lf_skip(&mode_info_context[-1].mbmi)) #if CONFIG_TX32X32 - || tx_size == TX_32X32 + || tx_size == TX_32X32 #endif - )) -#endif - ) + ))) vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); if (!skip_lf) { @@ -251,16 +248,13 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, } /* don't apply across umv border */ - if (mb_row > 0 -#if CONFIG_SUPERBLOCKS - && !((mb_row & 1) && mode_info_context->mbmi.sb_type && - ((skip_lf && mb_lf_skip(&mode_info_context[-mis].mbmi)) + if (mb_row > 0 && + !((mb_row & 1) && mode_info_context->mbmi.sb_type && + ((skip_lf && mb_lf_skip(&mode_info_context[-mis].mbmi)) #if CONFIG_TX32X32 - || tx_size == TX_32X32 + || tx_size == TX_32X32 #endif - )) -#endif - ) + ))) vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); if (!skip_lf) { @@ -273,12 +267,9 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, } } else { // FIXME: Not 8x8 aware - if (mb_col > 0 && - !(skip_lf && mb_lf_skip(&mode_info_context[-1].mbmi)) -#if CONFIG_SUPERBLOCKS - && !((mb_col & 1) && mode_info_context->mbmi.sb_type) -#endif - ) + if (mb_col > 0 && + !(skip_lf && mb_lf_skip(&mode_info_context[-1].mbmi)) && + !((mb_col & 1) && mode_info_context->mbmi.sb_type)) vp9_loop_filter_simple_mbv(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); if (!skip_lf) @@ -287,11 +278,8 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, /* don't apply across umv border */ if (mb_row > 0 && - !(skip_lf && mb_lf_skip(&mode_info_context[-mis].mbmi)) -#if CONFIG_SUPERBLOCKS - && !((mb_row & 1) && mode_info_context->mbmi.sb_type) -#endif - ) + !(skip_lf && mb_lf_skip(&mode_info_context[-mis].mbmi)) && + !((mb_row & 1) && mode_info_context->mbmi.sb_type)) vp9_loop_filter_simple_mbh(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); if (!skip_lf) diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index bfdc1af32..786b02188 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -17,14 +17,12 @@ static int mb_mv_ref_search[MVREF_NEIGHBOURS][2] = { }; static int mb_ref_distance_weight[MVREF_NEIGHBOURS] = { 3, 3, 2, 1, 1, 1, 1, 1 }; -#if CONFIG_SUPERBLOCKS static int sb_mv_ref_search[MVREF_NEIGHBOURS][2] = { {0, -1}, {-1, 0}, {1, -1}, {-1, 1}, {-1, -1}, {0, -2}, {-2, 0}, {-1, -2} }; static int sb_ref_distance_weight[MVREF_NEIGHBOURS] = { 3, 3, 2, 2, 2, 1, 1, 1 }; -#endif // clamp_mv #define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units @@ -236,7 +234,6 @@ void vp9_find_mv_refs( vpx_memset(candidate_mvs, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES); vpx_memset(candidate_scores, 0, sizeof(candidate_scores)); -#if CONFIG_SUPERBLOCKS if (mbmi->sb_type) { mv_ref_search = sb_mv_ref_search; ref_distance_weight = sb_ref_distance_weight; @@ -244,10 +241,6 @@ void vp9_find_mv_refs( mv_ref_search = mb_mv_ref_search; ref_distance_weight = mb_ref_distance_weight; } -#else - mv_ref_search = mb_mv_ref_search; - ref_distance_weight = mb_ref_distance_weight; -#endif // We first scan for candidate vectors that match the current reference frame // Look at nearest neigbours diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 3b62dac81..bdff48bf3 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -44,9 +44,7 @@ void vp9_initialize_common(void); typedef struct frame_contexts { vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1]; vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ -#if CONFIG_SUPERBLOCKS vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1]; -#endif vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1]; vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1]; vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; @@ -57,7 +55,7 @@ typedef struct frame_contexts { vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES_16X16]; vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; #endif @@ -65,18 +63,14 @@ typedef struct frame_contexts { nmv_context pre_nmvc; vp9_prob pre_bmode_prob[VP9_NKF_BINTRAMODES - 1]; vp9_prob pre_ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ -#if CONFIG_SUPERBLOCKS vp9_prob pre_sb_ymode_prob[VP9_I32X32_MODES - 1]; -#endif vp9_prob pre_uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1]; vp9_prob pre_i8x8_mode_prob[VP9_I8X8_MODES - 1]; vp9_prob pre_sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; vp9_prob pre_mbsplit_prob[VP9_NUMMBSPLITS - 1]; unsigned int bmode_counts[VP9_NKF_BINTRAMODES]; unsigned int ymode_counts[VP9_YMODES]; /* interframe intra mode probs */ -#if CONFIG_SUPERBLOCKS unsigned int sb_ymode_counts[VP9_I32X32_MODES]; -#endif unsigned int uv_mode_counts[VP9_YMODES][VP9_UV_MODES]; unsigned int i8x8_mode_counts[VP9_I8X8_MODES]; /* interframe intra probs */ unsigned int sub_mv_ref_counts[SUBMVREF_COUNT][VP9_SUBMVREFS]; @@ -88,7 +82,7 @@ typedef struct frame_contexts { vp9_coeff_probs pre_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES_16X16]; vp9_coeff_probs pre_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES_32X32]; #endif @@ -98,7 +92,7 @@ typedef struct frame_contexts { vp9_coeff_count hybrid_coef_counts_8x8[BLOCK_TYPES_8X8]; vp9_coeff_count coef_counts_16x16[BLOCK_TYPES_16X16]; vp9_coeff_count hybrid_coef_counts_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; #endif @@ -131,11 +125,11 @@ typedef enum { ONLY_4X4 = 0, ALLOW_8X8 = 1, ALLOW_16X16 = 2, -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 ALLOW_32X32 = 3, #endif - TX_MODE_SELECT = 3 + (CONFIG_TX32X32 && CONFIG_SUPERBLOCKS), - NB_TXFM_MODES = 4 + (CONFIG_TX32X32 && CONFIG_SUPERBLOCKS), + TX_MODE_SELECT = 3 + CONFIG_TX32X32, + NB_TXFM_MODES = 4 + CONFIG_TX32X32, } TXFM_MODE; typedef struct VP9Common { @@ -237,9 +231,7 @@ typedef struct VP9Common { [VP9_KF_BINTRAMODES] [VP9_KF_BINTRAMODES - 1]; vp9_prob kf_ymode_prob[8][VP9_YMODES - 1]; /* keyframe "" */ -#if CONFIG_SUPERBLOCKS vp9_prob sb_kf_ymode_prob[8][VP9_I32X32_MODES - 1]; -#endif int kf_ymode_probs_index; int kf_ymode_probs_update; vp9_prob kf_uv_mode_prob[VP9_YMODES] [VP9_UV_MODES - 1]; @@ -247,12 +239,10 @@ typedef struct VP9Common { vp9_prob prob_intra_coded; vp9_prob prob_last_coded; vp9_prob prob_gf_coded; -#if CONFIG_SUPERBLOCKS vp9_prob sb32_coded; #if CONFIG_SUPERBLOCKS64 vp9_prob sb64_coded; #endif // CONFIG_SUPERBLOCKS64 -#endif // Context probabilities when using predictive coding of segment id vp9_prob segment_pred_probs[PREDICTION_PROBS]; diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c index f2f35a3b4..76ae0b36b 100644 --- a/vp9/common/vp9_pred_common.c +++ b/vp9/common/vp9_pred_common.c @@ -223,14 +223,11 @@ unsigned char vp9_get_pred_flag(const MACROBLOCKD *const xd, void vp9_set_pred_flag(MACROBLOCKD *const xd, PRED_ID pred_id, unsigned char pred_flag) { -#if CONFIG_SUPERBLOCKS const int mis = xd->mode_info_stride; -#endif switch (pred_id) { case PRED_SEG_ID: xd->mode_info_context->mbmi.seg_id_predicted = pred_flag; -#if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.sb_type) { #define sub(a, b) (b) < 0 ? (a) + (b) : (a) const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; @@ -245,12 +242,10 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, } } } -#endif break; case PRED_REF: xd->mode_info_context->mbmi.ref_predicted = pred_flag; -#if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.sb_type) { const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7); @@ -263,12 +258,10 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, } } } -#endif break; case PRED_MBSKIP: xd->mode_info_context->mbmi.mb_skip_coeff = pred_flag; -#if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.sb_type) { const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7); @@ -281,7 +274,6 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, } } } -#endif break; default: @@ -299,11 +291,8 @@ unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm, const MACROBLOCKD *const xd, int MbIndex) { // Currently the prediction for the macroblock segment ID is // the value stored for this macroblock in the previous frame. -#if CONFIG_SUPERBLOCKS if (!xd->mode_info_context->mbmi.sb_type) { -#endif return cm->last_frame_seg_map[MbIndex]; -#if CONFIG_SUPERBLOCKS } else { const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; const int mb_col = MbIndex % cm->mb_cols; @@ -321,7 +310,6 @@ unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm, return seg_id; } -#endif } MV_REFERENCE_FRAME vp9_get_pred_ref(const VP9_COMMON *const cm, diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c index 00b537565..5c3f565a5 100644 --- a/vp9/common/vp9_recon.c +++ b/vp9/common/vp9_recon.c @@ -81,7 +81,6 @@ void vp9_recon2b_c(uint8_t *pred_ptr, } } -#if CONFIG_SUPERBLOCKS void vp9_recon_mby_s_c(MACROBLOCKD *xd, uint8_t *dst) { int x, y; BLOCKD *b = &xd->block[0]; @@ -146,8 +145,7 @@ void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { vdiff += 16; } } -#endif -#endif +#endif // CONFIG_TX32X32 void vp9_recon_mby_c(MACROBLOCKD *xd) { int i; diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 8a1b93df0..7a7283183 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -711,7 +711,6 @@ void vp9_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd, vp9_build_1st_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride); } -#if CONFIG_SUPERBLOCKS void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, uint8_t *dst_y, uint8_t *dst_u, @@ -781,6 +780,7 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, #endif } +#if CONFIG_SUPERBLOCKS64 void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, uint8_t *dst_y, uint8_t *dst_u, @@ -844,7 +844,7 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, } #endif } -#endif +#endif // CONFIG_SUPERBLOCKS64 /* * The following functions should be called after an initial diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 5e45b6879..12b4b2514 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -47,7 +47,6 @@ extern void vp9_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd, int dst_ystride, int dst_uvstride); -#if CONFIG_SUPERBLOCKS extern void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, uint8_t *dst_y, uint8_t *dst_u, @@ -55,13 +54,14 @@ extern void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, int dst_ystride, int dst_uvstride); +#if CONFIG_SUPERBLOCKS64 extern void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, uint8_t *dst_y, uint8_t *dst_u, uint8_t *dst_v, int dst_ystride, int dst_uvstride); -#endif +#endif // CONFIG_SUPERBLOCKS64 extern void vp9_build_inter_predictors_mb(MACROBLOCKD *xd); diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 0f58f9862..0504edaeb 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -658,7 +658,6 @@ void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd, vpred, uvstride, vintrapredictor, 8, 8); } -#if CONFIG_SUPERBLOCKS void vp9_build_interintra_32x32_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred, int ystride) { @@ -704,6 +703,7 @@ void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, vp9_build_interintra_32x32_predictors_sbuv(xd, upred, vpred, uvstride); } +#if CONFIG_SUPERBLOCKS64 void vp9_build_interintra_64x64_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred, int ystride) { @@ -744,8 +744,8 @@ void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd, vp9_build_interintra_64x64_predictors_sby(xd, ypred, ystride); vp9_build_interintra_64x64_predictors_sbuv(xd, upred, vpred, uvstride); } -#endif -#endif +#endif // CONFIG_SUPERBLOCKS64 +#endif // CONFIG_COMP_INTERINTRA_PRED void vp9_build_intra_predictors_mby(MACROBLOCKD *xd) { vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, @@ -761,7 +761,6 @@ void vp9_build_intra_predictors_mby_s(MACROBLOCKD *xd) { xd->up_available, xd->left_available); } -#if CONFIG_SUPERBLOCKS void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd) { vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, xd->dst.y_buffer, xd->dst.y_stride, @@ -769,13 +768,14 @@ void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd) { xd->up_available, xd->left_available); } +#if CONFIG_SUPERBLOCKS64 void vp9_build_intra_predictors_sb64y_s(MACROBLOCKD *xd) { vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, xd->dst.y_buffer, xd->dst.y_stride, xd->mode_info_context->mbmi.mode, 64, xd->up_available, xd->left_available); } -#endif +#endif // CONFIG_SUPERBLOCKS64 #if CONFIG_COMP_INTRA_PRED void vp9_build_comp_intra_predictors_mby(MACROBLOCKD *xd) { @@ -827,7 +827,6 @@ void vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd) { 8); } -#if CONFIG_SUPERBLOCKS void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd) { vp9_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, @@ -835,13 +834,14 @@ void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd) { 16); } +#if CONFIG_SUPERBLOCKS64 void vp9_build_intra_predictors_sb64uv_s(MACROBLOCKD *xd) { vp9_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->mode_info_context->mbmi.uv_mode, 32); } -#endif +#endif // CONFIG_SUPERBLOCKS64 #if CONFIG_COMP_INTRA_PRED void vp9_build_comp_intra_predictors_mbuv(MACROBLOCKD *xd) { diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h index 7bdcb4ece..88584ad3b 100644 --- a/vp9/common/vp9_reconintra.h +++ b/vp9/common/vp9_reconintra.h @@ -34,7 +34,6 @@ extern void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd, int uvstride); #endif // CONFIG_COMP_INTERINTRA_PRED -#if CONFIG_SUPERBLOCKS extern void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, uint8_t *ypred, uint8_t *upred, @@ -47,6 +46,5 @@ extern void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd, uint8_t *vpred, int ystride, int uvstride); -#endif // CONFIG_SUPERBLOCKS #endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 95253ef67..2dbfa72de 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -77,6 +77,16 @@ specialize vp9_dequant_idct_add_y_block prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs" specialize vp9_dequant_idct_add_uv_block +if [ "$CONFIG_TX32X32" = "yes" ]; then + +prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int pitch, int stride, int eob" +specialize vp9_dequant_idct_add_32x32 + +prototype void vp9_dequant_idct_add_uv_block_16x16 "int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs" +specialize vp9_dequant_idct_add_uv_block_16x16 + +fi + # # RECON # @@ -125,6 +135,16 @@ specialize vp9_recon_mby_s prototype void vp9_recon_mbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst" specialize void vp9_recon_mbuv_s +if [ "$CONFIG_TX32X32" = "yes" ]; then + +prototype void vp9_recon_sby_s "struct macroblockd *x, uint8_t *dst" +specialize vp9_recon_sby_s + +prototype void vp9_recon_sbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst" +specialize void vp9_recon_sbuv_s + +fi + prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x" specialize vp9_build_intra_predictors_mby_s @@ -152,6 +172,16 @@ specialize vp9_build_intra_predictors_mbuv_s; prototype void vp9_build_comp_intra_predictors_mbuv "struct macroblockd *x" specialize vp9_build_comp_intra_predictors_mbuv; +if [ "$CONFIG_SUPERBLOCKS64" = "yes" ]; then + +prototype void vp9_build_intra_predictors_sb64y_s "struct macroblockd *x" +specialize vp9_build_intra_predictors_sb64y_s; + +prototype void vp9_build_intra_predictors_sb64uv_s "struct macroblockd *x" +specialize vp9_build_intra_predictors_sb64uv_s; + +fi + prototype void vp9_intra4x4_predict "struct blockd *x, int b_mode, uint8_t *predictor" specialize vp9_intra4x4_predict; @@ -396,18 +426,12 @@ prototype void vp9_short_inv_walsh4x4_1_lossless "int16_t *in, int16_t *out" prototype void vp9_short_inv_walsh4x4_lossless "int16_t *in, int16_t *out" fi - - -if [ "$CONFIG_SUPERBLOCKS" = "yes" ]; then - prototype unsigned int vp9_sad32x3 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int max_sad" specialize vp9_sad32x3 prototype unsigned int vp9_sad3x32 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int max_sad" specialize vp9_sad3x32 -fi - # # Encoder functions below this point. # diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index bbe2e953c..78108a55d 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -51,7 +51,6 @@ static int read_ymode(vp9_reader *bc, const vp9_prob *p) { return treed_read(bc, vp9_ymode_tree, p); } -#if CONFIG_SUPERBLOCKS static int read_sb_ymode(vp9_reader *bc, const vp9_prob *p) { return treed_read(bc, vp9_sb_ymode_tree, p); } @@ -59,7 +58,6 @@ static int read_sb_ymode(vp9_reader *bc, const vp9_prob *p) { static int read_kf_sb_ymode(vp9_reader *bc, const vp9_prob *p) { return treed_read(bc, vp9_uv_mode_tree, p); } -#endif static int read_kf_mb_ymode(vp9_reader *bc, const vp9_prob *p) { return treed_read(bc, vp9_kf_ymode_tree, p); @@ -122,7 +120,6 @@ static void kfread_modes(VP9D_COMP *pbi, m->mbmi.segment_id = 0; if (pbi->mb.update_mb_segmentation_map) { read_mb_segid(bc, &m->mbmi, &pbi->mb); -#if CONFIG_SUPERBLOCKS if (m->mbmi.sb_type) { const int nmbs = 1 << m->mbmi.sb_type; const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); @@ -135,9 +132,7 @@ static void kfread_modes(VP9D_COMP *pbi, m->mbmi.segment_id; } } - } else -#endif - { + } else { cm->last_frame_seg_map[map_index] = m->mbmi.segment_id; } } @@ -161,14 +156,13 @@ static void kfread_modes(VP9D_COMP *pbi, m->mbmi.mb_skip_coeff = 0; } -#if CONFIG_SUPERBLOCKS if (m->mbmi.sb_type) { y_mode = (MB_PREDICTION_MODE) read_kf_sb_ymode(bc, pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]); - } else -#endif - y_mode = (MB_PREDICTION_MODE) read_kf_mb_ymode(bc, - pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]); + } else { + y_mode = (MB_PREDICTION_MODE) read_kf_mb_ymode(bc, + pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]); + } #if CONFIG_COMP_INTRA_PRED m->mbmi.second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); #endif @@ -228,12 +222,12 @@ static void kfread_modes(VP9D_COMP *pbi, m->mbmi.txfm_size = vp9_read(bc, cm->prob_tx[0]); if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED) { m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[1]); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 if (m->mbmi.txfm_size != TX_8X8 && m->mbmi.sb_type) m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[2]); #endif } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 } else if (cm->txfm_mode >= ALLOW_32X32 && m->mbmi.sb_type) { m->mbmi.txfm_size = TX_32X32; #endif @@ -504,11 +498,9 @@ static MV_REFERENCE_FRAME read_ref_frame(VP9D_COMP *pbi, return (MV_REFERENCE_FRAME)ref_frame; } -#if CONFIG_SUPERBLOCKS static MB_PREDICTION_MODE read_sb_mv_ref(vp9_reader *bc, const vp9_prob *p) { return (MB_PREDICTION_MODE) treed_read(bc, vp9_sb_mv_ref_tree, p); } -#endif static MB_PREDICTION_MODE read_mv_ref(vp9_reader *bc, const vp9_prob *p) { return (MB_PREDICTION_MODE) treed_read(bc, vp9_mv_ref_tree, p); @@ -598,7 +590,6 @@ static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *bc) { } while (++i < VP9_YMODES - 1); } -#if CONFIG_SUPERBLOCKS if (vp9_read_bit(bc)) { int i = 0; @@ -606,7 +597,6 @@ static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *bc) { cm->fc.sb_ymode_prob[i] = (vp9_prob) vp9_read_literal(bc, 8); } while (++i < VP9_I32X32_MODES - 1); } -#endif read_nmvprobs(bc, nmvc, xd->allow_high_precision_mv); } @@ -654,7 +644,6 @@ static void read_mb_segment_id(VP9D_COMP *pbi, else { read_mb_segid(bc, mbmi, xd); } -#if CONFIG_SUPERBLOCKS if (mbmi->sb_type) { const int nmbs = 1 << mbmi->sb_type; const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); @@ -667,13 +656,10 @@ static void read_mb_segment_id(VP9D_COMP *pbi, mbmi->segment_id; } } - } else -#endif - { + } else { cm->last_frame_seg_map[index] = mbmi->segment_id; } } else { -#if CONFIG_SUPERBLOCKS if (mbmi->sb_type) { const int nmbs = 1 << mbmi->sb_type; const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); @@ -689,9 +675,7 @@ static void read_mb_segment_id(VP9D_COMP *pbi, } } mbmi->segment_id = segment_id; - } else -#endif - { + } else { mbmi->segment_id = cm->last_frame_seg_map[index]; } } @@ -716,11 +700,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int mb_to_right_edge; int mb_to_top_edge; int mb_to_bottom_edge; -#if CONFIG_SUPERBLOCKS const int mb_size = 1 << mi->mbmi.sb_type; -#else - const int mb_size = 1; -#endif mb_to_top_edge = xd->mb_to_top_edge; mb_to_bottom_edge = xd->mb_to_bottom_edge; @@ -818,12 +798,10 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->mode = vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); } else { -#if CONFIG_SUPERBLOCKS if (mbmi->sb_type) mbmi->mode = read_sb_mv_ref(bc, mv_ref_p); else -#endif - mbmi->mode = read_mv_ref(bc, mv_ref_p); + mbmi->mode = read_mv_ref(bc, mv_ref_p); vp9_accum_mv_refs(&pbi->common, mbmi->mode, mbmi->mb_mode_context[ref_frame]); @@ -1172,12 +1150,10 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE)) { mbmi->mode = (MB_PREDICTION_MODE) vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); -#if CONFIG_SUPERBLOCKS } else if (mbmi->sb_type) { mbmi->mode = (MB_PREDICTION_MODE) read_sb_ymode(bc, pbi->common.fc.sb_ymode_prob); pbi->common.fc.sb_ymode_counts[mbmi->mode]++; -#endif } else { mbmi->mode = (MB_PREDICTION_MODE) read_ymode(bc, pbi->common.fc.ymode_prob); @@ -1249,12 +1225,12 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED && mbmi->mode != SPLITMV) { mbmi->txfm_size += vp9_read(bc, cm->prob_tx[1]); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 if (mbmi->sb_type && mbmi->txfm_size != TX_8X8) mbmi->txfm_size += vp9_read(bc, cm->prob_tx[2]); #endif } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 } else if (mbmi->sb_type && cm->txfm_mode >= ALLOW_32X32) { mbmi->txfm_size = TX_32X32; #endif diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 812bf10fc..47d23ecbd 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -172,7 +172,6 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { */ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { -#if CONFIG_SUPERBLOCKS #if CONFIG_SUPERBLOCKS64 if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { vp9_build_intra_predictors_sb64uv_s(xd); @@ -182,14 +181,11 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { vp9_build_intra_predictors_sbuv_s(xd); vp9_build_intra_predictors_sby_s(xd); - } else -#endif // CONFIG_SUPERBLOCKS - { + } else { vp9_build_intra_predictors_mbuv_s(xd); vp9_build_intra_predictors_mby_s(xd); } } else { -#if CONFIG_SUPERBLOCKS #if CONFIG_SUPERBLOCKS64 if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { vp9_build_inter64x64_predictors_sb(xd, @@ -207,9 +203,7 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); - } else -#endif // CONFIG_SUPERBLOCKS - { + } else { vp9_build_1st_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, @@ -562,7 +556,6 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, } } -#if CONFIG_SUPERBLOCKS static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, BOOL_DECODER* const bc, int n, int maska, int shiftb) { @@ -910,46 +903,43 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); } - } else { + } else #endif - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; + { + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; - if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows) - continue; + if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows) + continue; + xd->above_context = pc->above_context + mb_col + x_idx; + xd->left_context = pc->left_context + y_idx + (mb_row & 2); + xd->mode_info_context = orig_mi + x_idx + y_idx * mis; + for (i = 0; i < 25; i++) { + xd->block[i].eob = 0; + xd->eobs[i] = 0; + } - xd->above_context = pc->above_context + mb_col + x_idx; - xd->left_context = pc->left_context + y_idx + (mb_row & 2); - xd->mode_info_context = orig_mi + x_idx + y_idx * mis; - for (i = 0; i < 25; i++) { - xd->block[i].eob = 0; - xd->eobs[i] = 0; + eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); + if (eobtotal == 0) { // skip loopfilter + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + continue; + } + + if (tx_size == TX_16X16) { + decode_16x16_sb(pbi, xd, bc, n, 1, 1); + } else if (tx_size == TX_8X8) { + decode_8x8_sb(pbi, xd, bc, n, 1, 1); + } else { + decode_4x4_sb(pbi, xd, bc, n, 1, 1); + } } - eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); - if (eobtotal == 0) { // skip loopfilter - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - continue; - } - - if (tx_size == TX_16X16) { - decode_16x16_sb(pbi, xd, bc, n, 1, 1); - } else if (tx_size == TX_8X8) { - decode_8x8_sb(pbi, xd, bc, n, 1, 1); - } else { - decode_4x4_sb(pbi, xd, bc, n, 1, 1); - } + xd->above_context = pc->above_context + mb_col; + xd->left_context = pc->left_context + (mb_row & 2); + xd->mode_info_context = orig_mi; } - - xd->above_context = pc->above_context + mb_col; - xd->left_context = pc->left_context + (mb_row & 2); - xd->mode_info_context = orig_mi; -#if CONFIG_TX32X32 - } -#endif } -#endif static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, unsigned int mb_col, @@ -959,9 +949,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, int i; int tx_size; -#if CONFIG_SUPERBLOCKS assert(!xd->mode_info_context->mbmi.sb_type); -#endif // re-initialize macroblock dequantizer before detokenization if (xd->segmentation_enabled) @@ -1096,9 +1084,7 @@ static void set_offsets(VP9D_COMP *pbi, int block_size, const int recon_uvoffset = mb_row * 8 * recon_uv_stride + 8 * mb_col; xd->mode_info_context = cm->mi + idx; -#if CONFIG_SUPERBLOCKS xd->mode_info_context->mbmi.sb_type = block_size >> 5; -#endif xd->prev_mode_info_context = cm->prev_mi + idx; xd->above_context = cm->above_context + mb_col; xd->left_context = cm->left_context + (mb_row & 3); @@ -1173,7 +1159,6 @@ static void set_refs(VP9D_COMP *pbi, int block_size, } } -#if CONFIG_SUPERBLOCKS if (mbmi->sb_type) { const int n_mbs = 1 << mbmi->sb_type; const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row); @@ -1187,7 +1172,6 @@ static void set_refs(VP9D_COMP *pbi, int block_size, } } } -#endif } /* Decode a row of Superblocks (2x2 region of MBs) */ @@ -1200,7 +1184,7 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, vpx_memset(pc->left_context, 0, sizeof(pc->left_context)); for (mb_col = 0; mb_col < pc->mb_cols; mb_col += 4) { -#if CONFIG_SUPERBLOCKS64 && CONFIG_SUPERBLOCKS +#if CONFIG_SUPERBLOCKS64 if (vp9_read(bc, pc->sb64_coded)) { set_offsets(pbi, 64, mb_row, mb_col); vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc); @@ -1223,7 +1207,6 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, xd->sb_index = j; -#if CONFIG_SUPERBLOCKS if (vp9_read(bc, pc->sb32_coded)) { set_offsets(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb); vp9_decode_mb_mode_mv(pbi, @@ -1232,9 +1215,7 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, decode_superblock32(pbi, xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc); xd->corrupted |= bool_error(bc); - } else -#endif // CONFIG_SUPERBLOCKS - { + } else { int i; // Process the 4 MBs within the SB in the order: @@ -1426,7 +1407,7 @@ static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16); } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 if (pbi->common.txfm_mode > ALLOW_16X16) { read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES_32X32); } @@ -1615,23 +1596,21 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { } } -#if CONFIG_SUPERBLOCKS #if CONFIG_SUPERBLOCKS64 pc->sb64_coded = vp9_read_literal(&header_bc, 8); #endif pc->sb32_coded = vp9_read_literal(&header_bc, 8); -#endif /* Read the loop filter level and type */ pc->txfm_mode = vp9_read_literal(&header_bc, 2); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 if (pc->txfm_mode == 3) pc->txfm_mode += vp9_read_bit(&header_bc); #endif if (pc->txfm_mode == TX_MODE_SELECT) { pc->prob_tx[0] = vp9_read_literal(&header_bc, 8); pc->prob_tx[1] = vp9_read_literal(&header_bc, 8); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 pc->prob_tx[2] = vp9_read_literal(&header_bc, 8); #endif } @@ -1816,14 +1795,12 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pbi->common.fc.coef_probs_16x16); vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_16x16, pbi->common.fc.hybrid_coef_probs_16x16); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_copy(pbi->common.fc.pre_coef_probs_32x32, pbi->common.fc.coef_probs_32x32); #endif vp9_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob); -#if CONFIG_SUPERBLOCKS vp9_copy(pbi->common.fc.pre_sb_ymode_prob, pbi->common.fc.sb_ymode_prob); -#endif vp9_copy(pbi->common.fc.pre_uv_mode_prob, pbi->common.fc.uv_mode_prob); vp9_copy(pbi->common.fc.pre_bmode_prob, pbi->common.fc.bmode_prob); vp9_copy(pbi->common.fc.pre_i8x8_mode_prob, pbi->common.fc.i8x8_mode_prob); @@ -1839,13 +1816,11 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { vp9_zero(pbi->common.fc.hybrid_coef_counts_8x8); vp9_zero(pbi->common.fc.coef_counts_16x16); vp9_zero(pbi->common.fc.hybrid_coef_counts_16x16); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_zero(pbi->common.fc.coef_counts_32x32); #endif vp9_zero(pbi->common.fc.ymode_counts); -#if CONFIG_SUPERBLOCKS vp9_zero(pbi->common.fc.sb_ymode_counts); -#endif vp9_zero(pbi->common.fc.uv_mode_counts); vp9_zero(pbi->common.fc.bmode_counts); vp9_zero(pbi->common.fc.i8x8_mode_counts); diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 72cd2771e..46d4d01da 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -348,10 +348,10 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, } } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS -void vp9_dequant_idct_add_32x32(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *dest, int pitch, - int stride, int eob) { +#if CONFIG_TX32X32 +void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, + uint8_t *pred, uint8_t *dest, int pitch, + int stride, int eob) { int16_t output[1024]; int i; diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index bbbc173a2..2a0ae80e8 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -70,7 +70,6 @@ void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest, int pitch, int stride, uint16_t eobs); -#if CONFIG_SUPERBLOCKS void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq, unsigned char *dst, int stride, @@ -98,6 +97,5 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, int stride, uint16_t *eobs, MACROBLOCKD *xd); -#endif #endif diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index bdf578b3e..1286f9cca 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -144,7 +144,7 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, coef_counts = fc->hybrid_coef_counts_16x16; } break; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 case TX_32X32: coef_probs = fc->coef_probs_32x32; coef_counts = fc->coef_counts_32x32; @@ -249,7 +249,7 @@ static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) { return eob; } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 int vp9_decode_sb_tokens(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h index da11e57ef..eb9bc53bc 100644 --- a/vp9/decoder/vp9_detokenize.h +++ b/vp9/decoder/vp9_detokenize.h @@ -23,7 +23,7 @@ int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, int vp9_decode_mb_tokens(VP9D_COMP* const, MACROBLOCKD* const, BOOL_DECODER* const); -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 int vp9_decode_sb_tokens(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc); diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index 6ca73f65a..152527cff 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -39,7 +39,6 @@ void vp9_dequant_dc_idct_add_y_block_c(int16_t *q, const int16_t *dq, } } -#if CONFIG_SUPERBLOCKS void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq, uint8_t *dst, @@ -64,7 +63,6 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, dst += 4 * stride - 16; } } -#endif void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, uint8_t *pre, @@ -134,7 +132,6 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, } } -#if CONFIG_SUPERBLOCKS void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, @@ -175,7 +172,6 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, dstv += 4 * stride - 8; } } -#endif void vp9_dequant_dc_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, uint8_t *pre, @@ -200,7 +196,6 @@ void vp9_dequant_dc_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, xd->eobs[12]); } -#if CONFIG_SUPERBLOCKS void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq, uint8_t *dst, @@ -225,7 +220,6 @@ void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, dst + 8 * stride + 8, stride, stride, 1, xd->eobs[12]); } -#endif void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, uint8_t *pre, @@ -259,7 +253,6 @@ void vp9_dequant_idct_add_uv_block_8x8_c(int16_t *q, const int16_t *dq, vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, 0, xd->eobs[20]); } -#if CONFIG_SUPERBLOCKS void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, @@ -273,7 +266,6 @@ void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride, 0, xd->eobs[20]); } -#endif #if CONFIG_LOSSLESS void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index a8fdc6626..e03651493 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -146,11 +146,9 @@ static void update_mbintra_mode_probs(VP9_COMP* const cpi, bc, VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree, Pnew, cm->fc.ymode_prob, bct, (unsigned int *)cpi->ymode_count ); -#if CONFIG_SUPERBLOCKS update_mode(bc, VP9_I32X32_MODES, vp9_sb_ymode_encodings, vp9_sb_ymode_tree, Pnew, cm->fc.sb_ymode_prob, bct, (unsigned int *)cpi->sb_ymode_count); -#endif } } @@ -318,7 +316,6 @@ static void kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_kf_ymode_tree, p, vp9_kf_ymode_encodings + m); } -#if CONFIG_SUPERBLOCKS static void write_sb_ymode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_sb_ymode_tree, p, vp9_sb_ymode_encodings + m); } @@ -326,7 +323,6 @@ static void write_sb_ymode(vp9_writer *bc, int m, const vp9_prob *p) { static void sb_kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_uv_mode_tree, p, vp9_sb_kf_ymode_encodings + m); } -#endif static void write_i8x8_mode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_i8x8_mode_tree, p, vp9_i8x8_mode_encodings + m); @@ -492,7 +488,6 @@ static void write_mv_ref vp9_mv_ref_encoding_array - NEARESTMV + m); } -#if CONFIG_SUPERBLOCKS static void write_sb_mv_ref(vp9_writer *bc, MB_PREDICTION_MODE m, const vp9_prob *p) { #if CONFIG_DEBUG @@ -501,7 +496,6 @@ static void write_sb_mv_ref(vp9_writer *bc, MB_PREDICTION_MODE m, write_token(bc, vp9_sb_mv_ref_tree, p, vp9_sb_mv_ref_encoding_array - NEARESTMV + m); } -#endif static void write_sub_mv_ref ( @@ -703,11 +697,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, const MV_REFERENCE_FRAME rf = mi->ref_frame; const MB_PREDICTION_MODE mode = mi->mode; const int segment_id = mi->segment_id; -#if CONFIG_SUPERBLOCKS const int mb_size = 1 << mi->sb_type; -#else - const int mb_size = 1; -#endif int skip_coeff; int mb_row = pc->mb_rows - mb_rows_left; @@ -781,11 +771,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, #endif if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { -#if CONFIG_SUPERBLOCKS if (m->mbmi.sb_type) write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob); else -#endif write_ymode(bc, mode, pc->fc.ymode_prob); } if (mode == B_PRED) { @@ -835,12 +823,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, // Is the segment coding of mode enabled if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { -#if CONFIG_SUPERBLOCKS if (mi->sb_type) { write_sb_mv_ref(bc, mode, mv_ref_p); - } else -#endif - { + } else { write_mv_ref(bc, mode, mv_ref_p); } vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]); @@ -998,7 +983,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) { vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 if (mi->sb_type && sz != TX_8X8) vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]); #endif @@ -1027,11 +1012,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) { skip_coeff = 1; } else { -#if CONFIG_SUPERBLOCKS const int nmbs = 1 << m->mbmi.sb_type; -#else - const int nmbs = 1; -#endif const int xmbs = MIN(nmbs, mb_cols_left); const int ymbs = MIN(nmbs, mb_rows_left); int x, y; @@ -1047,13 +1028,10 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, vp9_get_pred_prob(c, xd, PRED_MBSKIP)); } -#if CONFIG_SUPERBLOCKS if (m->mbmi.sb_type) { sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); - } else -#endif - { + } else { kfwrite_ymode(bc, ym, c->kf_ymode_prob[c->kf_ymode_probs_index]); } @@ -1111,7 +1089,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, vp9_write(bc, sz != TX_4X4, c->prob_tx[0]); if (sz != TX_4X4 && ym <= TM_PRED) { vp9_write(bc, sz != TX_8X8, c->prob_tx[1]); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 if (m->mbmi.sb_type && sz != TX_8X8) vp9_write(bc, sz != TX_16X16, c->prob_tx[2]); #endif @@ -1155,7 +1133,7 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc) { for (mb_row = 0; mb_row < c->mb_rows; mb_row += 4, m_ptr += 4 * mis) { m = m_ptr; for (mb_col = 0; mb_col < c->mb_cols; mb_col += 4, m += 4) { -#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +#if CONFIG_SUPERBLOCKS64 vp9_write(bc, m->mbmi.sb_type == BLOCK_SIZE_SB64X64, c->sb64_coded); if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) { write_modes_b(cpi, m, bc, &tok, tok_end, mb_row, mb_col); @@ -1166,23 +1144,18 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc) { for (j = 0; j < 4; j++) { const int x_idx_sb = (j & 1) << 1, y_idx_sb = j & 2; -#if CONFIG_SUPERBLOCKS MODE_INFO *sb_m = m + y_idx_sb * mis + x_idx_sb; -#endif if (mb_col + x_idx_sb >= c->mb_cols || mb_row + y_idx_sb >= c->mb_rows) continue; -#if CONFIG_SUPERBLOCKS vp9_write(bc, sb_m->mbmi.sb_type, c->sb32_coded); if (sb_m->mbmi.sb_type) { assert(sb_m->mbmi.sb_type == BLOCK_SIZE_SB32X32); write_modes_b(cpi, sb_m, bc, &tok, tok_end, mb_row + y_idx_sb, mb_col + x_idx_sb); - } else -#endif - { + } else { // Process the 4 MBs in the order: // top-left, top-right, bottom-left, bottom-right for (i = 0; i < 4; i++) { @@ -1195,9 +1168,7 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc) { continue; } -#if CONFIG_SUPERBLOCKS assert(mb_m->mbmi.sb_type == BLOCK_SIZE_MB16X16); -#endif write_modes_b(cpi, mb_m, bc, &tok, tok_end, mb_row + y_idx, mb_col + x_idx); } @@ -1305,7 +1276,7 @@ static void build_coeff_contexts(VP9_COMP *cpi) { cpi, hybrid_context_counters_16x16, #endif cpi->frame_hybrid_branch_ct_16x16, BLOCK_TYPES_16X16); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 build_tree_distribution(cpi->frame_coef_probs_32x32, cpi->coef_counts_32x32, #ifdef ENTROPY_STATS @@ -1489,7 +1460,7 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { BLOCK_TYPES_16X16); } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 if (cpi->common.txfm_mode > ALLOW_16X16) { update_coef_probs_common(bc, #ifdef ENTROPY_STATS @@ -1535,13 +1506,11 @@ static void decide_kf_ymode_entropy(VP9_COMP *cpi) { for (j = 0; j < VP9_YMODES; j++) { cost += mode_cost[j] * cpi->ymode_count[j]; } -#if CONFIG_SUPERBLOCKS vp9_cost_tokens(mode_cost, cpi->common.sb_kf_ymode_prob[i], vp9_sb_ymode_tree); for (j = 0; j < VP9_I32X32_MODES; j++) { cost += mode_cost[j] * cpi->sb_ymode_count[j]; } -#endif if (cost < bestcost) { bestindex = i; bestcost = cost; @@ -1731,14 +1700,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } } -#if CONFIG_SUPERBLOCKS #if CONFIG_SUPERBLOCKS64 pc->sb64_coded = get_binary_prob(cpi->sb64_count[0], cpi->sb64_count[1]); vp9_write_literal(&header_bc, pc->sb64_coded, 8); #endif pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]); vp9_write_literal(&header_bc, pc->sb32_coded, 8); -#endif { if (pc->txfm_mode == TX_MODE_SELECT) { @@ -1748,7 +1715,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, cpi->txfm_count_32x32p[TX_4X4] + cpi->txfm_count_32x32p[TX_8X8] + cpi->txfm_count_32x32p[TX_16X16] + -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 cpi->txfm_count_32x32p[TX_32X32] + #endif cpi->txfm_count_16x16p[TX_4X4] + @@ -1760,12 +1727,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, cpi->txfm_count_16x16p[TX_8X8], cpi->txfm_count_32x32p[TX_8X8] + cpi->txfm_count_32x32p[TX_16X16] + -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 cpi->txfm_count_32x32p[TX_32X32] + #endif cpi->txfm_count_16x16p[TX_8X8] + cpi->txfm_count_16x16p[TX_16X16]); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 pc->prob_tx[2] = get_prob(cpi->txfm_count_32x32p[TX_16X16], cpi->txfm_count_32x32p[TX_16X16] + cpi->txfm_count_32x32p[TX_32X32]); @@ -1773,12 +1740,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } else { pc->prob_tx[0] = 128; pc->prob_tx[1] = 128; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 pc->prob_tx[2] = 128; #endif } vp9_write_literal(&header_bc, pc->txfm_mode <= 3 ? pc->txfm_mode : 3, 2); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 if (pc->txfm_mode > ALLOW_16X16) { vp9_write_bit(&header_bc, pc->txfm_mode == TX_MODE_SELECT); } @@ -1786,7 +1753,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, if (pc->txfm_mode == TX_MODE_SELECT) { vp9_write_literal(&header_bc, pc->prob_tx[0], 8); vp9_write_literal(&header_bc, pc->prob_tx[1], 8); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_write_literal(&header_bc, pc->prob_tx[2], 8); #endif } @@ -2009,13 +1976,11 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, cpi->common.fc.coef_probs_16x16); vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_copy(cpi->common.fc.pre_coef_probs_32x32, cpi->common.fc.coef_probs_32x32); #endif -#if CONFIG_SUPERBLOCKS vp9_copy(cpi->common.fc.pre_sb_ymode_prob, cpi->common.fc.sb_ymode_prob); -#endif vp9_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob); vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob); vp9_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 94078970b..18e5f73ff 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -35,13 +35,13 @@ typedef struct block { int16_t *zbin; int16_t *zbin_8x8; int16_t *zbin_16x16; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 int16_t *zbin_32x32; #endif int16_t *zrun_zbin_boost; int16_t *zrun_zbin_boost_8x8; int16_t *zrun_zbin_boost_16x16; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 int16_t *zrun_zbin_boost_32x32; #endif int16_t *round; @@ -57,7 +57,7 @@ typedef struct block { int eob_max_offset; int eob_max_offset_8x8; int eob_max_offset_16x16; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 int eob_max_offset_32x32; #endif } BLOCK; @@ -92,7 +92,7 @@ typedef struct { int64_t txfm_rd_diff[NB_TXFM_MODES]; } PICK_MODE_CONTEXT; -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 typedef struct superblock { DECLARE_ALIGNED(16, int16_t, src_diff[32*32+16*16*2]); DECLARE_ALIGNED(16, int16_t, coeff[32*32+16*16*2]); @@ -102,16 +102,11 @@ typedef struct superblock { typedef struct macroblock { DECLARE_ALIGNED(16, int16_t, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y DECLARE_ALIGNED(16, int16_t, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y -#if !CONFIG_SUPERBLOCKS - DECLARE_ALIGNED(16, uint8_t, thismb[256]); // 16x16 Y - - unsigned char *thismb_ptr; -#endif // 16 Y blocks, 4 U blocks, 4 V blocks, // 1 DC 2nd order block each with 16 entries BLOCK block[25]; -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 SUPERBLOCK sb_coeff_data; #endif @@ -183,13 +178,11 @@ typedef struct macroblock { // Structure to hold context for each of the 4 MBs within a SB: // when encoded as 4 independent MBs: PICK_MODE_CONTEXT mb_context[4][4]; -#if CONFIG_SUPERBLOCKS // when 4 MBs share coding parameters: PICK_MODE_CONTEXT sb32_context[4]; #if CONFIG_SUPERBLOCKS64 PICK_MODE_CONTEXT sb64_context; #endif // CONFIG_SUPERBLOCKS64 -#endif void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch); void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 509c426d8..ad27c6f39 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -285,10 +285,6 @@ static void build_activity_map(VP9_COMP *cpi) { xd->left_available = (mb_col != 0); recon_yoffset += 16; #endif -#if !CONFIG_SUPERBLOCKS - // Copy current mb to a buffer - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); -#endif // measure activity mb_activity = mb_activity_measure(cpi, x, mb_row, mb_col); @@ -444,20 +440,14 @@ static void update_state(VP9_COMP *cpi, int mb_mode = mi->mbmi.mode; int mb_mode_index = ctx->best_mode_index; const int mis = cpi->common.mode_info_stride; -#if CONFIG_SUPERBLOCKS int mb_block_size = 1 << mi->mbmi.sb_type; -#else - int mb_block_size = 1; -#endif #if CONFIG_DEBUG assert(mb_mode < MB_MODE_COUNT); assert(mb_mode_index < MAX_MODES); assert(mi->mbmi.ref_frame < MAX_REF_FRAMES); #endif -#if CONFIG_SUPERBLOCKS assert(mi->mbmi.sb_type == (block_size >> 5)); -#endif // Restore the coding context of the MB to that that was in place // when the mode was picked for it @@ -471,7 +461,7 @@ static void update_state(VP9_COMP *cpi, } } } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 if (block_size == 16) { ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16]; } @@ -787,18 +777,11 @@ static void pick_mb_modes(VP9_COMP *cpi, set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16, &recon_yoffset, &recon_uvoffset); -#if !CONFIG_SUPERBLOCKS - // Copy current MB to a work buffer - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); -#endif - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); mbmi = &xd->mode_info_context->mbmi; -#if CONFIG_SUPERBLOCKS mbmi->sb_type = BLOCK_SIZE_MB16X16; -#endif cpi->update_context = 0; // TODO Do we need this now?? @@ -869,7 +852,6 @@ static void pick_mb_modes(VP9_COMP *cpi, sizeof(above_context)); } -#if CONFIG_SUPERBLOCKS static void pick_sb_modes(VP9_COMP *cpi, int mb_row, int mb_col, @@ -943,7 +925,6 @@ static void pick_sb64_modes(VP9_COMP *cpi, } } #endif // CONFIG_SUPERBLOCKS64 -#endif // CONFIG_SUPERBLOCKS static void update_stats(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -1011,7 +992,6 @@ static void encode_sb(VP9_COMP *cpi, MACROBLOCKD *const xd = &x->e_mbd; int recon_yoffset, recon_uvoffset; -#if CONFIG_SUPERBLOCKS cpi->sb32_count[is_sb]++; if (is_sb) { set_offsets(cpi, mb_row, mb_col, 32, &recon_yoffset, &recon_uvoffset); @@ -1028,9 +1008,7 @@ static void encode_sb(VP9_COMP *cpi, if (mb_row < cm->mb_rows) cpi->tplist[mb_row].stop = *tp; } - } else -#endif - { + } else { int i; for (i = 0; i < 4; i++) { @@ -1046,11 +1024,6 @@ static void encode_sb(VP9_COMP *cpi, xd->mb_index = i; update_state(cpi, &x->mb_context[xd->sb_index][i], 16, output_enabled); -#if !CONFIG_SUPERBLOCKS - // Copy current MB to a work buffer - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); -#endif - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); @@ -1081,7 +1054,7 @@ static void encode_sb(VP9_COMP *cpi, #endif } -#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +#if CONFIG_SUPERBLOCKS64 static void encode_sb64(VP9_COMP *cpi, int mb_row, int mb_col, @@ -1121,7 +1094,7 @@ static void encode_sb64(VP9_COMP *cpi, } } } -#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +#endif // CONFIG_SUPERBLOCKS64 static void encode_sb_row(VP9_COMP *cpi, int mb_row, @@ -1141,20 +1114,18 @@ static void encode_sb_row(VP9_COMP *cpi, int i; int sb32_rate = 0, sb32_dist = 0; int is_sb[4]; -#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +#if CONFIG_SUPERBLOCKS64 int sb64_rate = INT_MAX, sb64_dist; ENTROPY_CONTEXT_PLANES l[4], a[4]; TOKENEXTRA *tp_orig = *tp; memcpy(&a, cm->above_context + mb_col, sizeof(a)); memcpy(&l, cm->left_context, sizeof(l)); -#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +#endif // CONFIG_SUPERBLOCKS64 for (i = 0; i < 4; i++) { const int x_idx = (i & 1) << 1, y_idx = i & 2; int mb_rate = 0, mb_dist = 0; -#if CONFIG_SUPERBLOCKS int sb_rate = INT_MAX, sb_dist; -#endif if (mb_row + y_idx >= cm->mb_rows || mb_col + x_idx >= cm->mb_cols) continue; @@ -1163,11 +1134,8 @@ static void encode_sb_row(VP9_COMP *cpi, pick_mb_modes(cpi, mb_row + y_idx, mb_col + x_idx, tp, &mb_rate, &mb_dist); -#if CONFIG_SUPERBLOCKS mb_rate += vp9_cost_bit(cm->sb32_coded, 0); -#endif -#if CONFIG_SUPERBLOCKS if (!((( mb_cols & 1) && mb_col + x_idx == mb_cols - 1) || ((cm->mb_rows & 1) && mb_row + y_idx == cm->mb_rows - 1))) { /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ @@ -1183,12 +1151,8 @@ static void encode_sb_row(VP9_COMP *cpi, is_sb[i] = 1; sb32_rate += sb_rate; sb32_dist += sb_dist; - } else -#endif - { -#if CONFIG_SUPERBLOCKS + } else { is_sb[i] = 0; -#endif sb32_rate += mb_rate; sb32_dist += mb_dist; } @@ -1200,11 +1164,10 @@ static void encode_sb_row(VP9_COMP *cpi, // instead of small->big) means we can use as threshold for small, which // may enable breakouts if RD is not good enough (i.e. faster) encode_sb(cpi, mb_row + y_idx, mb_col + x_idx, - !(CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64), - tp, is_sb[i]); + !CONFIG_SUPERBLOCKS64, tp, is_sb[i]); } -#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +#if CONFIG_SUPERBLOCKS64 memcpy(cm->above_context + mb_col, &a, sizeof(a)); memcpy(cm->left_context, &l, sizeof(l)); sb32_rate += vp9_cost_bit(cm->sb64_coded, 0); @@ -1227,11 +1190,11 @@ static void encode_sb_row(VP9_COMP *cpi, *totalrate += sb32_rate; } -#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +#if CONFIG_SUPERBLOCKS64 assert(tp_orig == *tp); encode_sb64(cpi, mb_row, mb_col, tp, is_sb); assert(tp_orig < *tp); -#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +#endif // CONFIG_SUPERBLOCKS64 } } @@ -1279,13 +1242,11 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_zero(cpi->sub_mv_ref_count) vp9_zero(cpi->mbsplit_count) vp9_zero(cpi->common.fc.mv_ref_ct) -#if CONFIG_SUPERBLOCKS vp9_zero(cpi->sb_ymode_count) vp9_zero(cpi->sb32_count); #if CONFIG_SUPERBLOCKS64 vp9_zero(cpi->sb64_count); #endif // CONFIG_SUPERBLOCKS64 -#endif // CONFIG_SUPERBLOCKS #if CONFIG_COMP_INTERINTRA_PRED vp9_zero(cpi->interintra_count); vp9_zero(cpi->interintra_select_count); @@ -1362,7 +1323,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_zero(cpi->hybrid_coef_counts_8x8); vp9_zero(cpi->coef_counts_16x16); vp9_zero(cpi->hybrid_coef_counts_16x16); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_zero(cpi->coef_counts_32x32); #endif #if CONFIG_NEW_MVREF @@ -1462,7 +1423,6 @@ static void reset_skip_txfm_size_mb(VP9_COMP *cpi, } } -#if CONFIG_SUPERBLOCKS static int get_skip_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs) { int x, y; @@ -1530,7 +1490,6 @@ static void reset_skip_txfm_size_sb64(VP9_COMP *cpi, MODE_INFO *mi, } } #endif -#endif static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { VP9_COMMON *const cm = &cpi->common; @@ -1541,33 +1500,28 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { mi = mi_ptr; for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) { -#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +#if CONFIG_SUPERBLOCKS64 if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { reset_skip_txfm_size_sb64(cpi, mi, mis, txfm_max, cm->mb_rows - mb_row, cm->mb_cols - mb_col); } else -#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +#endif // CONFIG_SUPERBLOCKS64 { int i; for (i = 0; i < 4; i++) { const int x_idx_sb = (i & 1) << 1, y_idx_sb = i & 2; -#if CONFIG_SUPERBLOCKS MODE_INFO *sb_mi = mi + y_idx_sb * mis + x_idx_sb; -#endif if (mb_row + y_idx_sb >= cm->mb_rows || mb_col + x_idx_sb >= cm->mb_cols) continue; -#if CONFIG_SUPERBLOCKS if (sb_mi->mbmi.sb_type) { reset_skip_txfm_size_sb32(cpi, sb_mi, mis, txfm_max, cm->mb_rows - mb_row - y_idx_sb, cm->mb_cols - mb_col - x_idx_sb); - } else -#endif - { + } else { int m; for (m = 0; m < 4; m++) { @@ -1579,9 +1533,7 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { continue; mb_mi = mi + y_idx * mis + x_idx; -#if CONFIG_SUPERBLOCKS assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16); -#endif reset_skip_txfm_size_mb(cpi, mb_mi, txfm_max); } } @@ -1647,7 +1599,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { * keyframe's probabilities as an estimate of what the current keyframe's * coefficient cost distributions may look like. */ if (frame_type == 0) { -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 txfm_type = ALLOW_32X32; #else txfm_type = ALLOW_16X16; @@ -1682,7 +1634,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { } else txfm_type = ALLOW_8X8; #else -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >= cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? ALLOW_32X32 : TX_MODE_SELECT; @@ -1742,7 +1694,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { const int count8x8_8x8p = cpi->txfm_count_8x8p[TX_8X8]; const int count16x16_16x16p = cpi->txfm_count_16x16p[TX_16X16]; const int count16x16_lp = cpi->txfm_count_32x32p[TX_16X16]; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 const int count32x32 = cpi->txfm_count_32x32p[TX_32X32]; #else const int count32x32 = 0; @@ -1756,13 +1708,13 @@ void vp9_encode_frame(VP9_COMP *cpi) { count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { cpi->common.txfm_mode = ONLY_4X4; reset_skip_txfm_size(cpi, TX_4X4); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { cpi->common.txfm_mode = ALLOW_32X32; #endif } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { cpi->common.txfm_mode = ALLOW_16X16; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 reset_skip_txfm_size(cpi, TX_16X16); #endif } @@ -1814,22 +1766,6 @@ void vp9_build_block_offsets(MACROBLOCK *x) { vp9_build_block_doffsets(&x->e_mbd); -#if !CONFIG_SUPERBLOCKS - // y blocks - x->thismb_ptr = &x->thismb[0]; - for (br = 0; br < 4; br++) { - for (bc = 0; bc < 4; bc++) { - BLOCK *this_block = &x->block[block]; - // this_block->base_src = &x->src.y_buffer; - // this_block->src_stride = x->src.y_stride; - // this_block->src = 4 * br * this_block->src_stride + 4 * bc; - this_block->base_src = &x->thismb_ptr; - this_block->src_stride = 16; - this_block->src = 4 * br * 16 + 4 * bc; - ++block; - } - } -#else for (br = 0; br < 4; br++) { for (bc = 0; bc < 4; bc++) { BLOCK *this_block = &x->block[block]; @@ -1842,7 +1778,6 @@ void vp9_build_block_offsets(MACROBLOCK *x) { ++block; } } -#endif // u blocks for (br = 0; br < 2; br++) { @@ -1896,12 +1831,11 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { } #endif -#if CONFIG_SUPERBLOCKS if (xd->mode_info_context->mbmi.sb_type) { ++cpi->sb_ymode_count[m]; - } else -#endif + } else { ++cpi->ymode_count[m]; + } if (m != I8X8_PRED) ++cpi->y_uv_mode_count[m][uvm]; else { @@ -1943,7 +1877,6 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) { #endif } -#if CONFIG_SUPERBLOCKS static void update_sb_skip_coeff_state(VP9_COMP *cpi, ENTROPY_CONTEXT_PLANES ta[4], ENTROPY_CONTEXT_PLANES tl[4], @@ -2010,6 +1943,7 @@ static void update_sb64_skip_coeff_state(VP9_COMP *cpi, int skip[16], int output_enabled) { MACROBLOCK *const x = &cpi->mb; +#if CONFIG_TX32X32 if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_32X32) { TOKENEXTRA tokens[4][1024+512]; int n_tokens[4], n; @@ -2057,7 +1991,9 @@ static void update_sb64_skip_coeff_state(VP9_COMP *cpi, (*tp) += n_tokens[n]; } } - } else { + } else +#endif // CONFIG_TX32X32 + { TOKENEXTRA tokens[16][16 * 25]; int n_tokens[16], n; @@ -2113,7 +2049,6 @@ static void update_sb64_skip_coeff_state(VP9_COMP *cpi, } } #endif // CONFIG_SUPERBLOCKS64 -#endif /* CONFIG_SUPERBLOCKS */ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset, @@ -2125,9 +2060,7 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; unsigned char ref_pred_flag; -#if CONFIG_SUPERBLOCKS assert(!xd->mode_info_context->mbmi.sb_type); -#endif #ifdef ENC_DEBUG enc_debug = (cpi->common.current_video_frame == 46 && @@ -2375,7 +2308,6 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, } } -#if CONFIG_SUPERBLOCKS static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset, int output_enabled, int mb_row, int mb_col) { @@ -2769,8 +2701,7 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); vp9_recon_sby_s_c(&x->e_mbd, - dst + 32 * x_idx + 32 * y_idx * dst_y_stride, - dst_y_stride); + dst + 32 * x_idx + 32 * y_idx * dst_y_stride); vp9_recon_sbuv_s_c(&x->e_mbd, udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride); @@ -2867,9 +2798,15 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, if (output_enabled) { if (cm->txfm_mode == TX_MODE_SELECT && !((cm->mb_no_coeff_skip && - ((mi->mbmi.txfm_size == TX_32X32 && + ( +#if CONFIG_TX32X32 + (mi->mbmi.txfm_size == TX_32X32 && skip[0] && skip[1] && skip[2] && skip[3]) || - (mi->mbmi.txfm_size != TX_32X32 && +#endif // CONFIG_TX32X32 + ( +#if CONFIG_TX32X32 + mi->mbmi.txfm_size != TX_32X32 && +#endif // CONFIG_TX32X32 skip[0] && skip[1] && skip[2] && skip[3] && skip[4] && skip[5] && skip[6] && skip[7] && skip[8] && skip[9] && skip[10] && skip[11] && @@ -2897,4 +2834,3 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, } } #endif // CONFIG_SUPERBLOCKS64 -#endif diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 2ca146c3b..216a7fb95 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -108,7 +108,7 @@ void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, int src_stride, } } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, const uint8_t *pred, int dst_stride) { int r, c; @@ -311,7 +311,7 @@ void vp9_transform_mb_16x16(MACROBLOCK *x) { vp9_transform_mbuv_8x8(x); } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 void vp9_transform_sby_32x32(MACROBLOCK *x) { SUPERBLOCK * const x_sb = &x->sb_coeff_data; vp9_short_fdct32x32(x_sb->src_diff, x_sb->coeff, 64); diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 75c3a8a8b..084d20ed9 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -46,7 +46,7 @@ void vp9_transform_mb_16x16(MACROBLOCK *mb); void vp9_transform_mby_16x16(MACROBLOCK *x); void vp9_optimize_mby_16x16(MACROBLOCK *x); -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 void vp9_transform_sby_32x32(MACROBLOCK *x); void vp9_transform_sbuv_16x16(MACROBLOCK *x); #endif @@ -55,7 +55,6 @@ void vp9_fidct_mb(MACROBLOCK *x); void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch); -#if CONFIG_SUPERBLOCKS void vp9_subtract_mbuv_s_c(int16_t *diff, const uint8_t *usrc, const uint8_t *vsrc, int src_stride, const uint8_t *upred, @@ -71,6 +70,5 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, const uint8_t *upred, const uint8_t *vpred, int dst_stride); #endif // CONFIG_TX32X32 -#endif // CONFIG_SUPERBLOCKS #endif // VP9_ENCODER_VP9_ENCODEMB_H_ diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index a14867292..44b140319 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -516,11 +516,6 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); -#if !CONFIG_SUPERBLOCKS - // Copy current mb to a buffer - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); -#endif - // do intra 16x16 prediction this_error = vp9_encode_intra(cpi, x, use_dc_pred); diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 9769d6344..4694a92c6 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -267,31 +267,9 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int offset; int usehp = xd->allow_high_precision_mv; -#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) - uint8_t *y0 = *(d->base_pre) + d->pre + - (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - uint8_t *y; - int buf_r1, buf_r2, buf_c1, buf_c2; - - // Clamping to avoid out-of-range data access - buf_r1 = ((bestmv->as_mv.row - VP9_INTERP_EXTEND) < x->mv_row_min) ? - (bestmv->as_mv.row - x->mv_row_min) : VP9_INTERP_EXTEND - 1; - buf_r2 = ((bestmv->as_mv.row + VP9_INTERP_EXTEND) > x->mv_row_max) ? - (x->mv_row_max - bestmv->as_mv.row) : VP9_INTERP_EXTEND - 1; - buf_c1 = ((bestmv->as_mv.col - VP9_INTERP_EXTEND) < x->mv_col_min) ? - (bestmv->as_mv.col - x->mv_col_min) : VP9_INTERP_EXTEND - 1; - buf_c2 = ((bestmv->as_mv.col + VP9_INTERP_EXTEND) > x->mv_col_max) ? - (x->mv_col_max - bestmv->as_mv.col) : VP9_INTERP_EXTEND - 1; - y_stride = 32; - - /* Copy to intermediate buffer before searching. */ - vfp->copymem(y0 - buf_c1 - d->pre_stride * buf_r1, d->pre_stride, xd->y_buf, y_stride, 16 + buf_r1 + buf_r2); - y = xd->y_buf + y_stride * buf_r1 + buf_c1; -#else uint8_t *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; y_stride = d->pre_stride; -#endif rr = ref_mv->as_mv.row; rc = ref_mv->as_mv.col; @@ -463,20 +441,9 @@ int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MACROBLOCKD *xd = &x->e_mbd; int usehp = xd->allow_high_precision_mv; -#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) - uint8_t *y0 = *(d->base_pre) + d->pre + - (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - uint8_t *y; - - y_stride = 32; - /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ - vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18); - y = xd->y_buf + y_stride + 1; -#else uint8_t *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; y_stride = d->pre_stride; -#endif // central mv bestmv->as_mv.row <<= 3; @@ -943,20 +910,9 @@ int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int y_stride; MACROBLOCKD *xd = &x->e_mbd; -#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) - uint8_t *y0 = *(d->base_pre) + d->pre + - (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; - uint8_t *y; - - y_stride = 32; - /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ - vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18); - y = xd->y_buf + y_stride + 1; -#else uint8_t *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; y_stride = d->pre_stride; -#endif // central mv bestmv->as_mv.row <<= 3; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 0e4b47ddf..a56fcc0c0 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1761,11 +1761,9 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cm->prob_last_coded = 128; cm->prob_gf_coded = 128; cm->prob_intra_coded = 63; -#if CONFIG_SUPERBLOCKS cm->sb32_coded = 200; #if CONFIG_SUPERBLOCKS64 cm->sb64_coded = 200; -#endif #endif for (i = 0; i < COMP_PRED_CONTEXTS; i++) cm->prob_comppred[i] = 128; @@ -1966,7 +1964,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->fn_ptr[BT].sdx4df = SDX4DF; -#if CONFIG_SUPERBLOCKS BFP(BLOCK_32X32, vp9_sad32x32, vp9_variance32x32, vp9_sub_pixel_variance32x32, vp9_variance_halfpixvar32x32_h, vp9_variance_halfpixvar32x32_v, vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8, @@ -1977,7 +1974,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { vp9_variance_halfpixvar64x64_h, vp9_variance_halfpixvar64x64_v, vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8, vp9_sad64x64x4d) -#endif #endif BFP(BLOCK_16X16, vp9_sad16x16, vp9_variance16x16, vp9_sub_pixel_variance16x16, @@ -3655,14 +3651,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16); vp9_copy(cpi->common.fc.hybrid_coef_counts_16x16, cpi->hybrid_coef_counts_16x16); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_copy(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32); #endif vp9_adapt_coef_probs(&cpi->common); if (cpi->common.frame_type != KEY_FRAME) { -#if CONFIG_SUPERBLOCKS vp9_copy(cpi->common.fc.sb_ymode_counts, cpi->sb_ymode_count); -#endif vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count); vp9_copy(cpi->common.fc.uv_mode_counts, cpi->y_uv_mode_count); vp9_copy(cpi->common.fc.bmode_counts, cpi->bmode_count); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index c9ee14425..dbe6e2bd6 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -100,13 +100,11 @@ typedef struct { vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES_16X16]; vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; #endif -#if CONFIG_SUPERBLOCKS vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1]; -#endif vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1]; vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1]; @@ -390,15 +388,11 @@ enum BlockSize { BLOCK_4X4 = PARTITIONING_4X4, BLOCK_16X16, BLOCK_MAX_SEGMENTS, -#if CONFIG_SUPERBLOCKS BLOCK_32X32 = BLOCK_MAX_SEGMENTS, #if CONFIG_SUPERBLOCKS64 BLOCK_64X64, #endif // CONFIG_SUPERBLOCKS64 BLOCK_MAX_SB_SEGMENTS, -#else // CONFIG_SUPERBLOCKS - BLOCK_MAX_SB_SEGMENTS = BLOCK_MAX_SEGMENTS, -#endif // CONFIG_SUPERBLOCKS }; typedef struct VP9_COMP { @@ -436,7 +430,7 @@ typedef struct VP9_COMP { DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_16x16[QINDEX_RANGE][256]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_16x16[QINDEX_RANGE][256]); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 DECLARE_ALIGNED(16, short, Y1zbin_32x32[QINDEX_RANGE][1024]); DECLARE_ALIGNED(16, short, Y2zbin_32x32[QINDEX_RANGE][1024]); DECLARE_ALIGNED(16, short, UVzbin_32x32[QINDEX_RANGE][1024]); @@ -577,13 +571,11 @@ typedef struct VP9_COMP { int cq_target_quality; -#if CONFIG_SUPERBLOCKS int sb32_count[2]; #if CONFIG_SUPERBLOCKS64 int sb64_count[2]; #endif int sb_ymode_count [VP9_I32X32_MODES]; -#endif int ymode_count[VP9_YMODES]; /* intra MB type cts this frame */ int bmode_count[VP9_NKF_BINTRAMODES]; int i8x8_mode_count[VP9_I8X8_MODES]; @@ -618,7 +610,7 @@ typedef struct VP9_COMP { vp9_coeff_probs frame_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; vp9_coeff_stats frame_hybrid_branch_ct_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES_32X32]; vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES_32X32]; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 051bd6461..98396a1a7 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -379,7 +379,7 @@ void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) { &d->eob, vp9_default_zig_zag1d_16x16, 1); } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 void vp9_quantize_sby_32x32(MACROBLOCK *x) { x->e_mbd.block[0].eob = 0; quantize(x->block[0].zrun_zbin_boost_32x32, @@ -472,7 +472,7 @@ void vp9_init_quantizer(VP9_COMP *cpi) { 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, }; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 static const int zbin_boost_32x32[1024] = { 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48, @@ -569,7 +569,7 @@ void vp9_init_quantizer(VP9_COMP *cpi) { ((quant_val * zbin_boost_8x8[0]) + 64) >> 7; cpi->zrun_zbin_boost_y1_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 cpi->Y1zbin_32x32[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; cpi->zrun_zbin_boost_y1_32x32[Q][0] = ((quant_val * zbin_boost_32x32[0]) + 64) >> 7; @@ -677,7 +677,7 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->zrun_zbin_boost_uv_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 // 32x32 structures. Same comment above applies. for (i = 1; i < 1024; i++) { int rc = vp9_default_zig_zag1d_32x32[i]; @@ -727,7 +727,7 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { x->block[i].zbin = cpi->Y1zbin[QIndex]; x->block[i].zbin_8x8 = cpi->Y1zbin_8x8[QIndex]; x->block[i].zbin_16x16 = cpi->Y1zbin_16x16[QIndex]; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 x->block[i].zbin_32x32 = cpi->Y1zbin_32x32[QIndex]; #endif x->block[i].round = cpi->Y1round[QIndex]; @@ -735,7 +735,7 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex]; x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y1_8x8[QIndex]; x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y1_16x16[QIndex]; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 x->block[i].zrun_zbin_boost_32x32 = cpi->zrun_zbin_boost_y1_32x32[QIndex]; #endif x->block[i].zbin_extra = (int16_t)zbin_extra; @@ -748,7 +748,7 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); x->block[i].eob_max_offset_16x16 = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 x->block[i].eob_max_offset_32x32 = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); #endif @@ -756,7 +756,7 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { x->block[i].eob_max_offset = 16; x->block[i].eob_max_offset_8x8 = 64; x->block[i].eob_max_offset_16x16 = 256; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 x->block[i].eob_max_offset_32x32 = 1024; #endif } diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index dbc3246b5..4ca3081db 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -78,7 +78,7 @@ void vp9_quantize_mb_16x16(MACROBLOCK *x); extern prototype_quantize_block(vp9_quantize_quantb_16x16); extern prototype_quantize_mb(vp9_quantize_mby_16x16); -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 void vp9_quantize_sby_32x32(MACROBLOCK *x); void vp9_quantize_sbuv_16x16(MACROBLOCK *x); #endif diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index ee87fba03..353ab979f 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -139,9 +139,7 @@ void vp9_save_coding_context(VP9_COMP *cpi) { vp9_copy(cc->vp9_mode_contexts, cm->fc.vp9_mode_contexts); vp9_copy(cc->ymode_prob, cm->fc.ymode_prob); -#if CONFIG_SUPERBLOCKS vp9_copy(cc->sb_ymode_prob, cm->fc.sb_ymode_prob); -#endif vp9_copy(cc->bmode_prob, cm->fc.bmode_prob); vp9_copy(cc->uv_mode_prob, cm->fc.uv_mode_prob); vp9_copy(cc->i8x8_mode_prob, cm->fc.i8x8_mode_prob); @@ -175,7 +173,7 @@ void vp9_save_coding_context(VP9_COMP *cpi) { vp9_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8); vp9_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16); vp9_copy(cc->hybrid_coef_probs_16x16, cm->fc.hybrid_coef_probs_16x16); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_copy(cc->coef_probs_32x32, cm->fc.coef_probs_32x32); #endif vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob); @@ -200,9 +198,7 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { vp9_copy(cm->fc.vp9_mode_contexts, cc->vp9_mode_contexts); vp9_copy(cm->fc.ymode_prob, cc->ymode_prob); -#if CONFIG_SUPERBLOCKS vp9_copy(cm->fc.sb_ymode_prob, cc->sb_ymode_prob); -#endif vp9_copy(cm->fc.bmode_prob, cc->bmode_prob); vp9_copy(cm->fc.i8x8_mode_prob, cc->i8x8_mode_prob); vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob); @@ -237,7 +233,7 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { vp9_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8); vp9_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16); vp9_copy(cm->fc.hybrid_coef_probs_16x16, cc->hybrid_coef_probs_16x16); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 vp9_copy(cm->fc.coef_probs_32x32, cc->coef_probs_32x32); #endif vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 8e91d828f..c695c049a 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -385,7 +385,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) { fill_token_costs(cpi->mb.hybrid_token_costs[TX_16X16], cpi->common.fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16); -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 fill_token_costs(cpi->mb.token_costs[TX_32X32], cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32); #endif @@ -527,7 +527,7 @@ static int cost_coeffs(MACROBLOCK *mb, int pt; const int eob = b->eob; MACROBLOCKD *xd = &mb->e_mbd; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 const int ib = (int)(b - xd->block); #endif int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; @@ -572,14 +572,14 @@ static int cost_coeffs(MACROBLOCK *mb, scan = vp9_default_zig_zag1d_16x16; band = vp9_coef_bands_16x16; seg_eob = 256; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 if (type == PLANE_TYPE_UV) { const int uv_idx = ib - 16; qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx; } #endif break; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 case TX_32X32: scan = vp9_default_zig_zag1d_32x32; band = vp9_coef_bands_32x32; @@ -853,7 +853,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, } } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 if (max_txfm_size == TX_32X32 && (cm->txfm_mode == ALLOW_32X32 || (cm->txfm_mode == TX_MODE_SELECT && @@ -863,7 +863,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, } else #endif if ( cm->txfm_mode == ALLOW_16X16 || -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) || #endif (cm->txfm_mode == TX_MODE_SELECT && @@ -884,7 +884,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, txfm_cache[ONLY_4X4] = rd[TX_4X4][0]; txfm_cache[ALLOW_8X8] = rd[TX_8X8][0]; txfm_cache[ALLOW_16X16] = rd[TX_16X16][0]; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0]; if (max_txfm_size == TX_32X32 && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && @@ -925,7 +925,6 @@ static void copy_predictor(uint8_t *dst, const uint8_t *predictor) { d[12] = p[12]; } -#if CONFIG_SUPERBLOCKS #if CONFIG_TX32X32 static int rdcost_sby_32x32(MACROBLOCK *x, int backup) { MACROBLOCKD * const xd = &x->e_mbd; @@ -1075,6 +1074,7 @@ static void super_block_yrd(VP9_COMP *cpi, xd->left_context = orig_left; } +#if CONFIG_SUPERBLOCKS64 static void super_block_64_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int *skip, @@ -1172,7 +1172,7 @@ static void super_block_64_yrd(VP9_COMP *cpi, xd->above_context = orig_above; xd->left_context = orig_left; } -#endif +#endif // CONFIG_SUPERBLOCKS64 static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) { const unsigned int *p = (const unsigned int *)predictor; @@ -1426,7 +1426,6 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rat return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } -#if CONFIG_SUPERBLOCKS static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, @@ -1508,7 +1507,6 @@ static int64_t rd_pick_intra_sb64y_mode(VP9_COMP *cpi, return best_rd; } #endif // CONFIG_SUPERBLOCKS64 -#endif static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -1882,7 +1880,6 @@ static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -#if CONFIG_SUPERBLOCKS #if CONFIG_TX32X32 static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) { int b; @@ -1939,56 +1936,56 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); rd_inter32x32_uv_16x16(x, rate, distortion, skip, 1); - } else { + } else #endif - int n, r = 0, d = 0; - int skippable = 1; - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT_PLANES *ta = xd->above_context; - ENTROPY_CONTEXT_PLANES *tl = xd->left_context; + { + int n, r = 0, d = 0; + int skippable = 1; + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; + ENTROPY_CONTEXT_PLANES *ta = xd->above_context; + ENTROPY_CONTEXT_PLANES *tl = xd->left_context; - memcpy(t_above, xd->above_context, sizeof(t_above)); - memcpy(t_left, xd->left_context, sizeof(t_left)); + memcpy(t_above, xd->above_context, sizeof(t_above)); + memcpy(t_left, xd->left_context, sizeof(t_left)); - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - int d_tmp, s_tmp, r_tmp; + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; + int d_tmp, s_tmp, r_tmp; - xd->above_context = ta + x_idx; - xd->left_context = tl + y_idx; - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); + xd->above_context = ta + x_idx; + xd->left_context = tl + y_idx; + vp9_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); - if (mbmi->txfm_size == TX_4X4) { - rd_inter16x16_uv_4x4(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0); - } else { - rd_inter16x16_uv_8x8(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0); + if (mbmi->txfm_size == TX_4X4) { + rd_inter16x16_uv_4x4(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0); + } else { + rd_inter16x16_uv_8x8(cpi, x, &r_tmp, &d_tmp, fullpixel, &s_tmp, 0); + } + + r += r_tmp; + d += d_tmp; + skippable = skippable && s_tmp; } - r += r_tmp; - d += d_tmp; - skippable = skippable && s_tmp; + *rate = r; + *distortion = d; + *skip = skippable; + xd->left_context = tl; + xd->above_context = ta; + memcpy(xd->above_context, t_above, sizeof(t_above)); + memcpy(xd->left_context, t_left, sizeof(t_left)); } - *rate = r; - *distortion = d; - *skip = skippable; - xd->left_context = tl; - xd->above_context = ta; - memcpy(xd->above_context, t_above, sizeof(t_above)); - memcpy(xd->left_context, t_left, sizeof(t_left)); -#if CONFIG_TX32X32 - } -#endif - return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } +#if CONFIG_SUPERBLOCKS64 static void super_block_64_uvrd(MACROBLOCK *x, int *rate, int *distortion, int *skip); static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, @@ -1996,7 +1993,7 @@ static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, super_block_64_uvrd(x, rate, distortion, skip); return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -#endif +#endif // CONFIG_SUPERBLOCKS64 static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int *skip, int fullpixel) { @@ -2130,7 +2127,6 @@ static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi, mbmi->uv_mode = mode_selected; } -#if CONFIG_SUPERBLOCKS // TODO(rbultje) very similar to rd_inter32x32_uv(), merge? static void super_block_uvrd(MACROBLOCK *x, int *rate, @@ -2148,57 +2144,57 @@ static void super_block_uvrd(MACROBLOCK *x, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); rd_inter32x32_uv_16x16(x, rate, distortion, skippable, 1); - } else { + } else #endif - int d = 0, r = 0, n, s = 1; - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT_PLANES *ta_orig = xd->above_context; - ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context; + { + int d = 0, r = 0, n, s = 1; + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; + ENTROPY_CONTEXT_PLANES *ta_orig = xd->above_context; + ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context; - memcpy(t_above, xd->above_context, sizeof(t_above)); - memcpy(t_left, xd->left_context, sizeof(t_left)); + memcpy(t_above, xd->above_context, sizeof(t_above)); + memcpy(t_left, xd->left_context, sizeof(t_left)); - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - if (mbmi->txfm_size == TX_4X4) { - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); - s &= vp9_mbuv_is_skippable_4x4(xd); - } else { - vp9_transform_mbuv_8x8(x); - vp9_quantize_mbuv_8x8(x); - s &= vp9_mbuv_is_skippable_8x8(xd); + vp9_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + if (mbmi->txfm_size == TX_4X4) { + vp9_transform_mbuv_4x4(x); + vp9_quantize_mbuv_4x4(x); + s &= vp9_mbuv_is_skippable_4x4(xd); + } else { + vp9_transform_mbuv_8x8(x); + vp9_quantize_mbuv_8x8(x); + s &= vp9_mbuv_is_skippable_8x8(xd); + } + + d += vp9_mbuverror(x) >> 2; + xd->above_context = t_above + x_idx; + xd->left_context = t_left + y_idx; + if (mbmi->txfm_size == TX_4X4) { + r += rd_cost_mbuv_4x4(x, 0); + } else { + r += rd_cost_mbuv_8x8(x, 0); + } } - d += vp9_mbuverror(x) >> 2; - xd->above_context = t_above + x_idx; - xd->left_context = t_left + y_idx; - if (mbmi->txfm_size == TX_4X4) { - r += rd_cost_mbuv_4x4(x, 0); - } else { - r += rd_cost_mbuv_8x8(x, 0); - } - } + xd->above_context = ta_orig; + xd->left_context = tl_orig; - xd->above_context = ta_orig; - xd->left_context = tl_orig; - - *distortion = d; - *rate = r; - *skippable = s; -#if CONFIG_TX32X32 + *distortion = d; + *rate = r; + *skippable = s; } -#endif } +#if CONFIG_SUPERBLOCKS64 static void super_block_64_uvrd(MACROBLOCK *x, int *rate, int *distortion, @@ -2239,8 +2235,9 @@ static void super_block_64_uvrd(MACROBLOCK *x, d += d_tmp; s = s && s_tmp; } - } else { + } else #endif + { for (n = 0; n < 16; n++) { int x_idx = n & 3, y_idx = n >> 2; @@ -2270,9 +2267,7 @@ static void super_block_64_uvrd(MACROBLOCK *x, r += rd_cost_mbuv_8x8(x, 0); } } -#if CONFIG_TX32X32 } -#endif *distortion = d; *rate = r; @@ -2281,6 +2276,7 @@ static void super_block_64_uvrd(MACROBLOCK *x, xd->left_context = tl_orig; xd->above_context = ta_orig; } +#endif // CONFIG_SUPERBLOCKS64 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -2357,7 +2353,6 @@ static int64_t rd_pick_intra_sb64uv_mode(VP9_COMP *cpi, return best_rd; } #endif // CONFIG_SUPERBLOCKS64 -#endif int vp9_cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE m, @@ -3651,7 +3646,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } #endif -#if CONFIG_SUPERBLOCKS #if CONFIG_SUPERBLOCKS64 if (block_size == BLOCK_64X64) { vp9_build_inter64x64_predictors_sb(xd, @@ -3669,9 +3663,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); - } else -#endif // CONFIG_SUPERBLOCKS - { + } else { assert(block_size == BLOCK_16X16); vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); if (is_comp_pred) @@ -3693,7 +3685,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (threshold < x->encode_breakout) threshold = x->encode_breakout; -#if CONFIG_SUPERBLOCKS #if CONFIG_SUPERBLOCKS64 if (block_size == BLOCK_64X64) { var = vp9_variance64x64(*(b->base_src), b->src_stride, @@ -3703,9 +3694,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (block_size == BLOCK_32X32) { var = vp9_variance32x32(*(b->base_src), b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); - } else -#endif // CONFIG_SUPERBLOCK - { + } else { assert(block_size == BLOCK_16X16); var = vp9_variance16x16(*(b->base_src), b->src_stride, xd->predictor, 16, &sse); @@ -3720,7 +3709,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Check u and v to make sure skip is ok int sse2; -#if CONFIG_SUPERBLOCKS #if CONFIG_SUPERBLOCKS64 if (block_size == BLOCK_64X64) { unsigned int sse2u, sse2v; @@ -3738,9 +3726,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride, xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); sse2 = sse2u + sse2v; - } else -#endif // CONFIG_SUPERBLOCKS - { + } else { assert(block_size == BLOCK_16X16); sse2 = vp9_uvsse(x); } @@ -3773,7 +3759,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (!x->skip) { -#if CONFIG_SUPERBLOCKS #if CONFIG_SUPERBLOCKS64 if (block_size == BLOCK_64X64) { int skippable_y, skippable_uv; @@ -3807,9 +3792,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *rate2 += *rate_uv; *distortion += *distortion_uv; *skippable = skippable_y && skippable_uv; - } else -#endif // CONFIG_SUPERBLOCKS - { + } else { assert(block_size == BLOCK_16X16); vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256], @@ -4652,7 +4635,6 @@ end: best_pred_diff, best_txfm_diff); } -#if CONFIG_SUPERBLOCKS void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, int *returnrate, int *returndist) { @@ -4711,8 +4693,7 @@ void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, *returndist = dist_y + (dist_uv >> 2); } } -#endif -#endif +#endif // CONFIG_SUPERBLOCKS64 void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, int *returnrate, int *returndist) { @@ -4870,7 +4851,6 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, *returndist = dist; } -#if CONFIG_SUPERBLOCKS static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, @@ -4920,7 +4900,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_TX32X32 int rate_uv_16x16 = 0, rate_uv_tokenonly_16x16 = 0; int dist_uv_16x16 = 0, uv_skip_16x16 = 0; - MB_PREDICTION_MODE mode_uv_16x16; + MB_PREDICTION_MODE mode_uv_16x16 = NEARESTMV; #endif x->skip = 0; @@ -5474,7 +5454,6 @@ int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, returnrate, returndistortion, BLOCK_64X64); } #endif // CONFIG_SUPERBLOCKS64 -#endif void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, int recon_yoffset, diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index 19529fcbe..2ef7be2a3 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -221,7 +221,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { mi = mi_ptr; for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) { -#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +#if CONFIG_SUPERBLOCKS64 if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, 4, mb_row, mb_col); @@ -230,23 +230,18 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { { for (i = 0; i < 4; i++) { int x_idx = (i & 1) << 1, y_idx = i & 2; -#if CONFIG_SUPERBLOCKS MODE_INFO *sb_mi = mi + y_idx * mis + x_idx; -#endif if (mb_col + x_idx >= cm->mb_cols || mb_row + y_idx >= cm->mb_rows) { continue; } -#if CONFIG_SUPERBLOCKS if (sb_mi->mbmi.sb_type) { assert(sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32); count_segs(cpi, sb_mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, 2, mb_row + y_idx, mb_col + x_idx); - } else -#endif - { + } else { int j; for (j = 0; j < 4; j++) { @@ -258,9 +253,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { continue; } -#if CONFIG_SUPERBLOCKS assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16); -#endif count_segs(cpi, mb_mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, 1, mb_row + y_idx_mb, mb_col + x_idx_mb); diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 7a364b3e8..dda81c838 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -141,7 +141,7 @@ static void tokenize_b(VP9_COMP *cpi, vp9_block2left[tx_size][ib]; ENTROPY_CONTEXT a_ec = *a, l_ec = *l; -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) + vp9_block2above[tx_size][ib]; ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) + @@ -195,7 +195,7 @@ static void tokenize_b(VP9_COMP *cpi, if (type != PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 } else { a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; @@ -212,14 +212,14 @@ static void tokenize_b(VP9_COMP *cpi, counts = cpi->coef_counts_16x16; probs = cpi->common.fc.coef_probs_16x16; } -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 if (type == PLANE_TYPE_UV) { int uv_idx = (ib - 16) >> 2; qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 256 * uv_idx; } #endif break; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 case TX_32X32: #if CONFIG_CNVCONTEXT a_ec = a[0] + a[1] + a[2] + a[3] + @@ -294,13 +294,13 @@ static void tokenize_b(VP9_COMP *cpi, if (type != PLANE_TYPE_UV) { a[1] = a[2] = a[3] = a_ec; l[1] = l[2] = l[3] = l_ec; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 } else { a1[0] = a1[1] = a[1] = a_ec; l1[0] = l1[1] = l[1] = l_ec; #endif } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 } else if (tx_size == TX_32X32) { a[1] = a[2] = a[3] = a_ec; l[1] = l[2] = l[3] = l_ec; @@ -378,7 +378,7 @@ static int mb_is_skippable_16x16(MACROBLOCKD *xd) { return (vp9_mby_is_skippable_16x16(xd) & vp9_mbuv_is_skippable_8x8(xd)); } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) { int skip = 1; skip &= !xd->block[0].eob; @@ -768,7 +768,7 @@ static __inline void stuff_b(VP9_COMP *cpi, ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib]; ENTROPY_CONTEXT a_ec = *a, l_ec = *l; -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) + vp9_block2above[tx_size][ib]; ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) + @@ -808,7 +808,7 @@ static __inline void stuff_b(VP9_COMP *cpi, if (type != PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 } else { a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; @@ -824,7 +824,7 @@ static __inline void stuff_b(VP9_COMP *cpi, probs = cpi->common.fc.coef_probs_16x16; } break; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 case TX_32X32: #if CONFIG_CNVCONTEXT a_ec = a[0] + a[1] + a[2] + a[3] + @@ -857,13 +857,13 @@ static __inline void stuff_b(VP9_COMP *cpi, if (type != PLANE_TYPE_UV) { a[1] = a[2] = a[3] = 0; l[1] = l[2] = l[3] = 0; -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 } else { a1[0] = a1[1] = a[1] = a_ec; l1[0] = l1[1] = l[1] = l_ec; #endif } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 } else if (tx_size == TX_32X32) { a[1] = a[2] = a[3] = a_ec; l[1] = l[2] = l[3] = l_ec; @@ -983,7 +983,7 @@ void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { } } -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 static void stuff_sb_32x32(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { int b; @@ -1005,7 +1005,7 @@ void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { } #endif -#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS +#if CONFIG_TX32X32 void vp9_fix_contexts_sb(MACROBLOCKD *xd) { vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index dffd294dd..9bc756e82 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -36,7 +36,7 @@ extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block); extern int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd); extern int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd); -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 extern int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd); extern int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd); #endif @@ -50,12 +50,12 @@ extern void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, extern void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 extern void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); #endif -#if CONFIG_SUPERBLOCKS && CONFIG_TX32X32 +#if CONFIG_TX32X32 extern void vp9_fix_contexts_sb(MACROBLOCKD *xd); #endif #ifdef ENTROPY_STATS diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index 9060d4c74..4eee6ae56 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -24,7 +24,7 @@ unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { return sum; } -#if CONFIG_SUPERBLOCKS +#if CONFIG_SUPERBLOCKS64 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -37,6 +37,7 @@ unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, *sse = var; return (var - (((int64_t)avg * avg) >> 12)); } +#endif // CONFIG_SUPERBLOCKS64 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, @@ -50,7 +51,6 @@ unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, *sse = var; return (var - (((int64_t)avg * avg) >> 10)); } -#endif unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, @@ -197,7 +197,7 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, return vp9_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } -#if CONFIG_SUPERBLOCKS +#if CONFIG_SUPERBLOCKS64 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -218,6 +218,7 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, return vp9_variance64x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse); } +#endif // CONFIG_SUPERBLOCKS64 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int src_pixels_per_line, @@ -238,7 +239,6 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, return vp9_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse); } -#endif unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, @@ -249,7 +249,6 @@ unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, ref_ptr, recon_stride, sse); } -#if CONFIG_SUPERBLOCKS unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -259,6 +258,7 @@ unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, ref_ptr, recon_stride, sse); } +#if CONFIG_SUPERBLOCKS64 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -267,7 +267,7 @@ unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, ref_ptr, recon_stride, sse); } -#endif +#endif // CONFIG_SUPERBLOCKS64 unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, @@ -279,7 +279,6 @@ unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, ref_ptr, recon_stride, sse); } -#if CONFIG_SUPERBLOCKS unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -289,6 +288,7 @@ unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, ref_ptr, recon_stride, sse); } +#if CONFIG_SUPERBLOCKS64 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -297,7 +297,8 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, ref_ptr, recon_stride, sse); } -#endif +#endif // #if CONFIG_SUPERBLOCKS64 + unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, @@ -308,7 +309,6 @@ unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, ref_ptr, recon_stride, sse); } -#if CONFIG_SUPERBLOCKS unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -318,6 +318,7 @@ unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, ref_ptr, recon_stride, sse); } +#if CONFIG_SUPERBLOCKS64 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -326,7 +327,7 @@ unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, ref_ptr, recon_stride, sse); } -#endif +#endif // CONFIG_SUPERBLOCKS64 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, int src_pixels_per_line, @@ -341,7 +342,6 @@ unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, return *sse; } -#if CONFIG_SUPERBLOCKS unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -355,6 +355,7 @@ unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, return *sse; } +#if CONFIG_SUPERBLOCKS64 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -367,7 +368,7 @@ unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, dst_pixels_per_line, sse); return *sse; } -#endif +#endif // CONFIG_SUPERBLOCKS64 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int src_pixels_per_line, From 4b7304ee68dc28f96d73ff4ff1894de833d54f1a Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Tue, 8 Jan 2013 12:18:16 -0800 Subject: [PATCH 53/77] Adds 64x64 hybrid dct/dwt transform This is to add to the 64x64 transform experiment as an alternative to a 64x64 DCT. Two levels of wavelet decomposition is used on a 64x64 block, followed by 16x16 DCT on the four lowest subbands. The highest three subbands are left untransformed after the first level DWT. Change-Id: I3d48d5800468d655191933894df6b46e15adca56 --- configure | 3 +- vp9/common/vp9_entropy.c | 4 +- vp9/common/vp9_idctllm.c | 110 +++++++++++++++++++++++++++++++++------ vp9/encoder/vp9_dct.c | 83 +++++++++++++++++++++++++---- vp9/encoder/vp9_rdopt.c | 6 +-- 5 files changed, 175 insertions(+), 31 deletions(-) diff --git a/configure b/configure index 57a614596..396cee71b 100755 --- a/configure +++ b/configure @@ -249,7 +249,8 @@ EXPERIMENT_LIST=" newbintramodes comp_interintra_pred tx32x32 - dwt32x32hybrid + tx64x64 + dwtdcthybrid cnvcontext newcoefcontext " diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index cdc8bc14e..bc8738411 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -143,7 +143,7 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { }; #if CONFIG_TX32X32 -#if CONFIG_DWT32X32HYBRID +#if CONFIG_DWTDCTHYBRID DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, 6, 6, @@ -458,7 +458,7 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 951, 920, 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, 799, 830, 861, 892, 923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023, }; -#endif // CONFIG_DWT32X32HYBRID +#endif // CONFIG_DWTDCTHYBRID #endif /* Array indices are identical to previously-existing CONTEXT_NODE indices */ diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 4dd540e2a..baa22457f 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -1534,7 +1534,7 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { #endif #if CONFIG_TX32X32 -#if !CONFIG_DWT32X32HYBRID +#if !CONFIG_DWTDCTHYBRID #define DownshiftMultiplyBy2(x) x * 2 #define DownshiftMultiply(x) x static void idct16(double *input, double *output, int stride) { @@ -1879,7 +1879,7 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { vp9_clear_system_state(); // Make it simd safe : __asm emms; } -#else // CONFIG_DWT32X32HYBRID +#else // CONFIG_DWTDCTHYBRID #define DWT_MAX_LENGTH 32 #define DWT_TYPE 26 // 26/53/97 @@ -1940,8 +1940,8 @@ static void synthesis_53_col(int length, int16_t *lowpass, int16_t *highpass, *x++ = ((*b) << 1) + *a; } -void dyadic_synthesize_53(int levels, int width, int height, int16_t *c, - int pitch_c, int16_t *x, int pitch_x) { +static void dyadic_synthesize_53(int levels, int width, int height, int16_t *c, + int pitch_c, int16_t *x, int pitch_x) { int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; short buffer[2 * DWT_MAX_LENGTH]; @@ -2031,8 +2031,8 @@ static void synthesis_26_col(int length, int16_t *lowpass, int16_t *highpass, } } -void dyadic_synthesize_26(int levels, int width, int height, int16_t *c, - int pitch_c, int16_t *x, int pitch_x) { +static void dyadic_synthesize_26(int levels, int width, int height, int16_t *c, + int pitch_c, int16_t *x, int pitch_x) { int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; int16_t buffer[2 * DWT_MAX_LENGTH]; @@ -2111,8 +2111,8 @@ static void synthesis_97(int length, double *lowpass, double *highpass, x[length - 1] -= 2 * a_predict1 * x[length - 2]; } -void dyadic_synthesize_97(int levels, int width, int height, int16_t *c, - int pitch_c, int16_t *x, int pitch_x) { +static void dyadic_synthesize_97(int levels, int width, int height, int16_t *c, + int pitch_c, int16_t *x, int pitch_x) { int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width; double buffer[2 * DWT_MAX_LENGTH]; double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH]; @@ -2358,7 +2358,8 @@ static void butterfly_16x16_idct_1d_f(double input[16], double output[16]) { vp9_clear_system_state(); // Make it simd safe : __asm emms; } -void vp9_short_idct16x16_c_f(int16_t *input, int16_t *output, int pitch) { +static void vp9_short_idct16x16_c_f(int16_t *input, int16_t *output, int pitch, + int scale) { vp9_clear_system_state(); // Make it simd safe : __asm emms; { double out[16*16], out2[16*16]; @@ -2383,13 +2384,13 @@ void vp9_short_idct16x16_c_f(int16_t *input, int16_t *output, int pitch) { out2[j*16 + i] = temp_out[j]; } for (i = 0; i < 16*16; ++i) - output[i] = round(out2[i] / (64 >> DWT_PRECISION_BITS)); + output[i] = round(out2[i] / (128 >> scale)); } vp9_clear_system_state(); // Make it simd safe : __asm emms; } void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { - // assume out is a 32x32 buffer + // assume output is a 32x32 buffer // Temporary buffer to hold a 16x16 block for 16x16 inverse dct int16_t buffer[16 * 16]; // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt @@ -2400,20 +2401,24 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { // TODO(debargha): Implement more efficiently by adding output pitch // argument to the idct16x16 function - vp9_short_idct16x16_c_f(input, buffer, pitch); + vp9_short_idct16x16_c_f(input, buffer, pitch, + 1 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) { vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16); } - vp9_short_idct16x16_c_f(input + 16, buffer, pitch); + vp9_short_idct16x16_c_f(input + 16, buffer, pitch, + 1 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) { vpx_memcpy(buffer2 + i * 32 + 16, buffer + i * 16, sizeof(*buffer2) * 16); } - vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch); + vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch, + 1 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) { vpx_memcpy(buffer2 + i * 32 + 16 * 32, buffer + i * 16, sizeof(*buffer2) * 16); } - vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch); + vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch, + 1 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) { vpx_memcpy(buffer2 + i * 32 + 16 * 33, buffer + i * 16, sizeof(*buffer2) * 16); @@ -2426,5 +2431,78 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32); #endif } -#endif // CONFIG_DWT32X32HYBRID + +void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { + // assume output is a 64x64 buffer + // Temporary buffer to hold a 16x16 block for 16x16 inverse dct + int16_t buffer[16 * 16]; + // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt + int16_t buffer2[64 * 64]; + // Note: pitch is in bytes, short_pitch is in short units + const int short_pitch = pitch >> 1; + int i, j; + + // TODO(debargha): Implement more efficiently by adding output pitch + // argument to the idct16x16 function + vp9_short_idct16x16_c_f(input, buffer, pitch, + 2 + DWT_PRECISION_BITS); + for (i = 0; i < 16; ++i) { + vpx_memcpy(buffer2 + i * 64, buffer + i * 16, sizeof(*buffer2) * 16); + } + vp9_short_idct16x16_c_f(input + 16, buffer, pitch, + 2 + DWT_PRECISION_BITS); + for (i = 0; i < 16; ++i) { + vpx_memcpy(buffer2 + i * 64 + 16, buffer + i * 16, sizeof(*buffer2) * 16); + } + vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch, + 2 + DWT_PRECISION_BITS); + for (i = 0; i < 16; ++i) { + vpx_memcpy(buffer2 + i * 64 + 16 * 64, buffer + i * 16, + sizeof(*buffer2) * 16); + } + vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch, + 2 + DWT_PRECISION_BITS); + for (i = 0; i < 16; ++i) { + vpx_memcpy(buffer2 + i * 64 + 16 * 65, buffer + i * 16, + sizeof(*buffer2) * 16); + } + + // Copying and scaling highest bands into buffer2 +#if DWT_PRECISION_BITS < 1 + for (i = 0; i < 32; ++i) { + for (j = 0; j < 32; ++j) { + buffer2[i * 64 + 32 + j] = + input[i * short_pitch + 32 + j] >> (1 - DWT_PRECISION_BITS); + } + } + for (i = 0; i < 32; ++i) { + for (j = 0; j < 64; ++j) { + buffer2[i * 64 + j] = + input[(i + 32) * short_pitch + j] >> (1 - DWT_PRECISION_BITS); + } + } +#else + for (i = 0; i < 32; ++i) { + for (j = 0; j < 32; ++j) { + buffer2[i * 64 + 32 + j] = + input[i * short_pitch + 32 + j] << (DWT_PRECISION_BITS - 1); + } + } + for (i = 0; i < 32; ++i) { + for (j = 0; j < 64; ++j) { + buffer2[i * 64 + j] = + input[(i + 32) * short_pitch + j] << (DWT_PRECISION_BITS - 1); + } + } +#endif + +#if DWT_TYPE == 26 + dyadic_synthesize_26(2, 64, 64, buffer2, 64, output, 64); +#elif DWT_TYPE == 97 + dyadic_synthesize_97(2, 64, 64, buffer2, 64, output, 64); +#elif DWT_TYPE == 53 + dyadic_synthesize_53(2, 64, 64, buffer2, 64, output, 64); +#endif +} +#endif // CONFIG_DWTDCTHYBRID #endif // CONFIG_TX32X32 diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index e14421d2d..0de6393a0 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -1332,8 +1332,9 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) { #undef ROUNDING #endif +#if CONFIG_TX32X32 || CONFIG_TX64X64 +#if !CONFIG_DWTDCTHYBRID #if CONFIG_TX32X32 -#if !CONFIG_DWT32X32HYBRID static void dct32_1d(double *input, double *output, int stride) { static const double C1 = 0.998795456205; // cos(pi * 1 / 64) static const double C2 = 0.995184726672; // cos(pi * 2 / 64) @@ -1684,8 +1685,9 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { vp9_clear_system_state(); // Make it simd safe : __asm emms; } +#endif // CONFIG_TX32X32 -#else // CONFIG_DWT32X32HYBRID +#else // CONFIG_DWTDCTHYBRID #define DWT_MAX_LENGTH 64 #define DWT_TYPE 26 // 26/53/97 @@ -2108,7 +2110,8 @@ static void dct16x16_1d_f(double input[16], double output[16]) { vp9_clear_system_state(); // Make it simd safe : __asm emms; } -void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch) { +static void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch, + int scale) { vp9_clear_system_state(); // Make it simd safe : __asm emms; { int shortpitch = pitch >> 1; @@ -2134,11 +2137,12 @@ void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch) { } // Scale by some magic number for (i = 0; i < 256; i++) - out[i] = (short)round(output[i] / (4 << DWT_PRECISION_BITS)); + out[i] = (short)round(output[i] / (2 << scale)); } vp9_clear_system_state(); // Make it simd safe : __asm emms; } +#if CONFIG_TX32X32 void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { // assume out is a 32x32 buffer short buffer[16 * 16]; @@ -2153,21 +2157,82 @@ void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { #endif // TODO(debargha): Implement more efficiently by adding output pitch // argument to the dct16x16 function - vp9_short_fdct16x16_c_f(out, buffer, 64); + vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16); - vp9_short_fdct16x16_c_f(out + 16, buffer, 64); + vp9_short_fdct16x16_c_f(out + 16, buffer, 64, 1 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) vpx_memcpy(out + i * 32 + 16, buffer + i * 16, sizeof(short) * 16); - vp9_short_fdct16x16_c_f(out + 32 * 16, buffer, 64); + vp9_short_fdct16x16_c_f(out + 32 * 16, buffer, 64, 1 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) vpx_memcpy(out + i * 32 + 32 * 16, buffer + i * 16, sizeof(short) * 16); - vp9_short_fdct16x16_c_f(out + 33 * 16, buffer, 64); + vp9_short_fdct16x16_c_f(out + 33 * 16, buffer, 64, 1 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) vpx_memcpy(out + i * 32 + 33 * 16, buffer + i * 16, sizeof(short) * 16); } -#endif // CONFIG_DWT32X32HYBRID #endif // CONFIG_TX32X32 + +#if CONFIG_TX64X64 +void vp9_short_fdct64x64_c(short *input, short *out, int pitch) { + // assume out is a 64x64 buffer + short buffer[16 * 16]; + int i, j; + const int short_pitch = pitch >> 1; +#if DWT_TYPE == 26 + dyadic_analyze_26(2, 64, 64, input, short_pitch, out, 64); +#elif DWT_TYPE == 97 + dyadic_analyze_97(2, 64, 64, input, short_pitch, out, 64); +#elif DWT_TYPE == 53 + dyadic_analyze_53(2, 64, 64, input, short_pitch, out, 64); +#endif + // TODO(debargha): Implement more efficiently by adding output pitch + // argument to the dct16x16 function + vp9_short_fdct16x16_c_f(out, buffer, 128, 2 + DWT_PRECISION_BITS); + for (i = 0; i < 16; ++i) + vpx_memcpy(out + i * 64, buffer + i * 16, sizeof(short) * 16); + + vp9_short_fdct16x16_c_f(out + 16, buffer, 128, 2 + DWT_PRECISION_BITS); + for (i = 0; i < 16; ++i) + vpx_memcpy(out + i * 64 + 16, buffer + i * 16, sizeof(short) * 16); + + vp9_short_fdct16x16_c_f(out + 64 * 16, buffer, 128, 2 + DWT_PRECISION_BITS); + for (i = 0; i < 16; ++i) + vpx_memcpy(out + i * 64 + 64 * 16, buffer + i * 16, sizeof(short) * 16); + + vp9_short_fdct16x16_c_f(out + 65 * 16, buffer, 128, 2 + DWT_PRECISION_BITS); + for (i = 0; i < 16; ++i) + vpx_memcpy(out + i * 64 + 65 * 16, buffer + i * 16, sizeof(short) * 16); + + // There is no dct used on the highest bands for now. + // Need to scale these coeffs by a factor of 2/2^DWT_PRECISION_BITS + // TODO(debargha): experiment with turning these coeffs to 0 +#if DWT_PRECISION_BITS < 1 + for (i = 0; i < 32; ++i) { + for (j = 0; j < 32; ++j) { + out[i * 64 + 32 + j] <<= (1 - DWT_PRECISION_BITS); + } + } + for (i = 0; i < 32; ++i) { + for (j = 0; j < 64; ++j) { + out[i * 64 + j] <<= (1 - DWT_PRECISION_BITS); + } + } +#else + for (i = 0; i < 32; ++i) { + for (j = 0; j < 32; ++j) { + out[i * 64 + 32 + j] >>= (DWT_PRECISION_BITS - 1); + } + } + for (i = 0; i < 32; ++i) { + for (j = 0; j < 64; ++j) { + out[i * 64 + j] >>= (DWT_PRECISION_BITS - 1); + } + } +#endif +} +#endif // CONFIG_TX64X64 +#endif // CONFIG_DWTDCTHYBRID +#endif // CONFIG_TX32X32 || CONFIG_TX64X64 diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index c695c049a..956d8f90c 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -965,17 +965,17 @@ static void super_block_yrd_32x32(MACROBLOCK *x, SUPERBLOCK * const x_sb = &x->sb_coeff_data; MACROBLOCKD * const xd = &x->e_mbd; SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data; -#if DEBUG_ERROR || CONFIG_DWT32X32HYBRID +#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID int16_t out[1024]; #endif vp9_transform_sby_32x32(x); vp9_quantize_sby_32x32(x); -#if DEBUG_ERROR || CONFIG_DWT32X32HYBRID +#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID vp9_short_idct32x32(xd_sb->dqcoeff, out, 64); #endif -#if !CONFIG_DWT32X32HYBRID +#if !CONFIG_DWTDCTHYBRID *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024); #else *distortion = vp9_block_error_c(x_sb->src_diff, out, 1024) << 4; From 7d6b5425d704c91c6d048c1a96542c9cd027f015 Mon Sep 17 00:00:00 2001 From: Adrian Grange Date: Tue, 8 Jan 2013 14:14:01 -0800 Subject: [PATCH 54/77] New prediction filter This patch removes the old pred-filter experiment and replaces it with one that is implemented using the switchable filter framework. If the pred-filter experiment is enabled, three interopolation filters are tested during mode selection; the standard 8-tap interpolation filter, a sharp 8-tap filter and a (new) 8-tap smoothing filter. The 6-tap filter code has been preserved for now and if the enable-6tap experiment is enabled (in addition to the pred-filter experiment) the original 6-tap filter replaces the new 8-tap smooth filter in the switchable mode. The new experiment applies the prediction filter in cases of a fractional-pel motion vector. Future patches will apply the filter where the mv is pel-aligned and also to intra predicted blocks. Change-Id: I08e8cba978f2bbf3019f8413f376b8e2cd85eba4 --- configure | 1 + vp9/common/vp9_blockd.h | 19 +- vp9/common/vp9_entropymode.c | 16 +- vp9/common/vp9_entropymode.h | 4 + vp9/common/vp9_filter.c | 338 ++++++++++++++++++++++------------ vp9/common/vp9_filter.h | 1 + vp9/common/vp9_onyxc_int.h | 7 - vp9/common/vp9_reconinter.c | 305 +++++------------------------- vp9/common/vp9_rtcd_defs.sh | 37 +++- vp9/decoder/vp9_decodemv.c | 17 -- vp9/encoder/vp9_bitstream.c | 20 -- vp9/encoder/vp9_encodeframe.c | 10 - vp9/encoder/vp9_encodemb.c | 5 - vp9/encoder/vp9_encodemb.h | 3 - vp9/encoder/vp9_mbgraph.c | 5 - vp9/encoder/vp9_onyx_if.c | 214 ++------------------- vp9/encoder/vp9_onyx_int.h | 101 ---------- vp9/encoder/vp9_rdopt.c | 128 +------------ 18 files changed, 338 insertions(+), 893 deletions(-) diff --git a/configure b/configure index 396cee71b..0a5825194 100755 --- a/configure +++ b/configure @@ -253,6 +253,7 @@ EXPERIMENT_LIST=" dwtdcthybrid cnvcontext newcoefcontext + enable_6tap " CONFIG_LIST=" external_build diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 9f6e53356..26f4a2ff1 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -76,10 +76,13 @@ typedef enum { typedef enum { - SIXTAP = 0, - BILINEAR = 1, - EIGHTTAP = 2, - EIGHTTAP_SHARP = 3, +#if CONFIG_ENABLE_6TAP + SIXTAP, +#endif + EIGHTTAP_SMOOTH, + EIGHTTAP, + EIGHTTAP_SHARP, + BILINEAR, SWITCHABLE /* should be the last one */ } INTERPOLATIONFILTERTYPE; @@ -268,10 +271,6 @@ typedef struct { // a valid predictor unsigned char mb_in_image; -#if CONFIG_PRED_FILTER - // Flag to turn prediction signal filter on(1)/off(0 ) at the MB level - unsigned int pred_filter_enabled; -#endif INTERPOLATIONFILTERTYPE interp_filter; BLOCK_SIZE_TYPE sb_type; @@ -399,11 +398,11 @@ typedef struct macroblockd { void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out); - vp9_subpix_fn_t subpixel_predict; + vp9_subpix_fn_t subpixel_predict4x4; vp9_subpix_fn_t subpixel_predict8x4; vp9_subpix_fn_t subpixel_predict8x8; vp9_subpix_fn_t subpixel_predict16x16; - vp9_subpix_fn_t subpixel_predict_avg; + vp9_subpix_fn_t subpixel_predict_avg4x4; vp9_subpix_fn_t subpixel_predict_avg8x4; vp9_subpix_fn_t subpixel_predict_avg8x8; vp9_subpix_fn_t subpixel_predict_avg16x16; diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index d7c7a3f66..ecae5e057 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -388,9 +388,15 @@ const vp9_tree_index vp9_switchable_interp_tree[VP9_SWITCHABLE_FILTERS*2-2] = { -1, -2 }; struct vp9_token_struct vp9_switchable_interp_encodings[VP9_SWITCHABLE_FILTERS]; +#if CONFIG_ENABLE_6TAP const INTERPOLATIONFILTERTYPE vp9_switchable_interp[VP9_SWITCHABLE_FILTERS] = { - EIGHTTAP, SIXTAP, EIGHTTAP_SHARP}; -const int vp9_switchable_interp_map[SWITCHABLE+1] = {1, -1, 0, 2, -1}; + SIXTAP, EIGHTTAP, EIGHTTAP_SHARP}; +const int vp9_switchable_interp_map[SWITCHABLE+1] = {0, -1, 1, 2, -1, -1}; +#else +const INTERPOLATIONFILTERTYPE vp9_switchable_interp[VP9_SWITCHABLE_FILTERS] = { + EIGHTTAP, EIGHTTAP_SMOOTH, EIGHTTAP_SHARP}; +const int vp9_switchable_interp_map[SWITCHABLE+1] = {1, 0, 2, -1, -1}; +#endif const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1] [VP9_SWITCHABLE_FILTERS-1] = { {248, 192}, { 32, 248}, { 32, 32}, {192, 160} @@ -408,7 +414,11 @@ const vp9_prob vp9_switchable_interp_prob [VP9_SWITCHABLE_FILTERS+1] }; const INTERPOLATIONFILTERTYPE vp9_switchable_interp[VP9_SWITCHABLE_FILTERS] = { EIGHTTAP, EIGHTTAP_SHARP}; -const int vp9_switchable_interp_map[SWITCHABLE+1] = {-1, -1, 0, 1, -1}; //8, 8s +#if CONFIG_ENABLE_6TAP +const int vp9_switchable_interp_map[SWITCHABLE+1] = {-1, -1, 0, 1, -1, -1}; +#else +const int vp9_switchable_interp_map[SWITCHABLE+1] = {-1, 0, 1, -1, -1}; +#endif #endif void vp9_entropy_mode_init() { diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index fe3ace6bc..439fb3f43 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -97,7 +97,11 @@ void vp9_kf_default_bmode_probs(vp9_prob dest[VP9_KF_BINTRAMODES] void vp9_adapt_mode_probs(struct VP9Common *); +#if CONFIG_PRED_FILTER +#define VP9_SWITCHABLE_FILTERS 3 /* number of switchable filters */ +#else #define VP9_SWITCHABLE_FILTERS 2 /* number of switchable filters */ +#endif extern const INTERPOLATIONFILTERTYPE vp9_switchable_interp [VP9_SWITCHABLE_FILTERS]; diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c index 2adbfe137..07d8a169f 100644 --- a/vp9/common/vp9_filter.c +++ b/vp9/common/vp9_filter.c @@ -122,6 +122,28 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = { #endif /* FILTER_ALPHA_SHARP */ }; +DECLARE_ALIGNED(16, const int16_t, + vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]) = { + /* 8-tap lowpass filter */ + /* Hamming window */ + {-1, -7, 32, 80, 32, -7, -1, 0}, + {-1, -8, 28, 80, 37, -7, -2, 1}, + { 0, -8, 24, 79, 41, -7, -2, 1}, + { 0, -8, 20, 78, 45, -5, -3, 1}, + { 0, -8, 16, 76, 50, -4, -3, 1}, + { 0, -7, 13, 74, 54, -3, -4, 1}, + { 1, -7, 9, 71, 58, -1, -4, 1}, + { 1, -6, 6, 68, 62, 1, -5, 1}, + { 1, -6, 4, 65, 65, 4, -6, 1}, + { 1, -5, 1, 62, 68, 6, -6, 1}, + { 1, -4, -1, 58, 71, 9, -7, 1}, + { 1, -4, -3, 54, 74, 13, -7, 0}, + { 1, -3, -4, 50, 76, 16, -8, 0}, + { 1, -3, -5, 45, 78, 20, -8, 0}, + { 1, -2, -7, 41, 79, 24, -8, 0}, + { 1, -2, -7, 37, 80, 28, -8, -1} +}; + DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6]) = { {0, 0, 128, 0, 0, 0}, {1, -5, 125, 8, -2, 1}, @@ -254,30 +276,33 @@ static void filter_block2d_6(uint8_t *src_ptr, int output_pitch, const int16_t *HFilter, const int16_t *VFilter) { - int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer used in filtering */ + int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, src_pixels_per_line, 1, - 3 + Interp_Extend * 2, 4, HFilter); + filter_block2d_first_pass_6( + src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, + src_pixels_per_line, 1, 3 + Interp_Extend * 2, 4, HFilter); - /* then filter verticaly... */ - filter_block2d_second_pass_6(FData + 4 * (Interp_Extend - 1), output_ptr, output_pitch, 4, 4, 4, 4, VFilter); + /* then filter vertically... */ + filter_block2d_second_pass_6(FData + 4 * (Interp_Extend - 1), output_ptr, + output_pitch, 4, 4, 4, 4, VFilter); } -void vp9_sixtap_predict_c(uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - uint8_t *dst_ptr, - int dst_pitch) { +void vp9_sixtap_predict4x4_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { const int16_t *HFilter; const int16_t *VFilter; HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */ - filter_block2d_6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); + filter_block2d_6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, + VFilter); } /* @@ -293,32 +318,32 @@ static void filter_block2d_avg_6(uint8_t *src_ptr, int output_pitch, const int16_t *HFilter, const int16_t *VFilter) { - int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer used in filtering */ + int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), - FData, src_pixels_per_line, 1, - 3 + Interp_Extend * 2, 4, HFilter); + filter_block2d_first_pass_6( + src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, + src_pixels_per_line, 1, 3 + Interp_Extend * 2, 4, HFilter); - /* then filter verticaly... */ + /* then filter vertically... */ filter_block2d_second_pass_avg_6(FData + 4 * (Interp_Extend - 1), output_ptr, output_pitch, 4, 4, 4, 4, VFilter); } -void vp9_sixtap_predict_avg_c(uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - uint8_t *dst_ptr, - int dst_pitch) { +void vp9_sixtap_predict_avg4x4_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { const int16_t *HFilter; const int16_t *VFilter; HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */ - filter_block2d_avg_6(src_ptr, dst_ptr, src_pixels_per_line, - dst_pitch, HFilter, VFilter); + filter_block2d_avg_6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, + HFilter, VFilter); } void vp9_sixtap_predict8x8_c(uint8_t *src_ptr, @@ -329,19 +354,19 @@ void vp9_sixtap_predict8x8_c(uint8_t *src_ptr, int dst_pitch) { const int16_t *HFilter; const int16_t *VFilter; - // int FData[(7+Interp_Extend*2)*16]; /* Temp data buffer used in filtering */ - int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */ + int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer */ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, src_pixels_per_line, 1, - 7 + Interp_Extend * 2, 8, HFilter); + filter_block2d_first_pass_6( + src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, + src_pixels_per_line, 1, 7 + Interp_Extend * 2, 8, HFilter); - - /* then filter verticaly... */ - filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); + /* then filter vertically... */ + filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr, + dst_pitch, 8, 8, 8, 8, VFilter); } @@ -353,18 +378,19 @@ void vp9_sixtap_predict_avg8x8_c(uint8_t *src_ptr, int dst_pitch) { const int16_t *HFilter; const int16_t *VFilter; - // int FData[(7+Interp_Extend*2)*16]; /* Temp data buffer used in filtering */ - int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */ + int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer */ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, src_pixels_per_line, 1, - 7 + Interp_Extend * 2, 8, HFilter); + filter_block2d_first_pass_6( + src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, + src_pixels_per_line, 1, 7 + Interp_Extend * 2, 8, HFilter); - /* then filter verticaly... */ - filter_block2d_second_pass_avg_6(FData + 8 * (Interp_Extend - 1), dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); + /* then filter vertically... */ + filter_block2d_second_pass_avg_6(FData + 8 * (Interp_Extend - 1), dst_ptr, + dst_pitch, 8, 8, 8, 8, VFilter); } void vp9_sixtap_predict8x4_c(uint8_t *src_ptr, @@ -375,20 +401,19 @@ void vp9_sixtap_predict8x4_c(uint8_t *src_ptr, int dst_pitch) { const int16_t *HFilter; const int16_t *VFilter; - // int FData[(7+Interp_Extend*2)*16]; /* Temp data buffer used in filtering */ - int FData[(3 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */ + int FData[(3 + Interp_Extend * 2) * 8]; /* Temp data buffer */ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, src_pixels_per_line, 1, - 3 + Interp_Extend * 2, 8, HFilter); - - - /* then filter verticaly... */ - filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); + filter_block2d_first_pass_6( + src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, + src_pixels_per_line, 1, 3 + Interp_Extend * 2, 8, HFilter); + /* then filter vertically... */ + filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr, + dst_pitch, 8, 8, 4, 8, VFilter); } void vp9_sixtap_predict16x16_c(uint8_t *src_ptr, @@ -399,20 +424,19 @@ void vp9_sixtap_predict16x16_c(uint8_t *src_ptr, int dst_pitch) { const int16_t *HFilter; const int16_t *VFilter; - // int FData[(15+Interp_Extend*2)*24]; /* Temp data buffer used in filtering */ - int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer used in filtering */ - + int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer */ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, src_pixels_per_line, 1, - 15 + Interp_Extend * 2, 16, HFilter); - - /* then filter verticaly... */ - filter_block2d_second_pass_6(FData + 16 * (Interp_Extend - 1), dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); + filter_block2d_first_pass_6( + src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, + src_pixels_per_line, 1, 15 + Interp_Extend * 2, 16, HFilter); + /* then filter vertically... */ + filter_block2d_second_pass_6(FData + 16 * (Interp_Extend - 1), dst_ptr, + dst_pitch, 16, 16, 16, 16, VFilter); } void vp9_sixtap_predict_avg16x16_c(uint8_t *src_ptr, @@ -423,19 +447,19 @@ void vp9_sixtap_predict_avg16x16_c(uint8_t *src_ptr, int dst_pitch) { const int16_t *HFilter; const int16_t *VFilter; - // int FData[(15+Interp_Extend*2)*24]; /* Temp data buffer used in filtering */ - int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer used in filtering */ + int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer */ HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */ VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ - filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, - src_pixels_per_line, 1, 15 + Interp_Extend * 2, 16, HFilter); + filter_block2d_first_pass_6( + src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData, + src_pixels_per_line, 1, 15 + Interp_Extend * 2, 16, HFilter); - /* then filter verticaly... */ - filter_block2d_second_pass_avg_6(FData + 16 * (Interp_Extend - 1), dst_ptr, dst_pitch, - 16, 16, 16, 16, VFilter); + /* then filter vertically... */ + filter_block2d_second_pass_avg_6(FData + 16 * (Interp_Extend - 1), dst_ptr, + dst_pitch, 16, 16, 16, 16, VFilter); } typedef enum { @@ -543,8 +567,7 @@ void vp9_filter_block2d_4x4_8_c(const uint8_t *src_ptr, const int16_t *VFilter_aligned16, uint8_t *dst_ptr, unsigned int dst_stride) { - filter_block2d_8_c(src_ptr, src_stride, - HFilter_aligned16, VFilter_aligned16, + filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16, VPX_FILTER_4x4, dst_ptr, dst_stride); } @@ -554,8 +577,7 @@ void vp9_filter_block2d_8x4_8_c(const uint8_t *src_ptr, const int16_t *VFilter_aligned16, uint8_t *dst_ptr, unsigned int dst_stride) { - filter_block2d_8_c(src_ptr, src_stride, - HFilter_aligned16, VFilter_aligned16, + filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16, VPX_FILTER_8x4, dst_ptr, dst_stride); } @@ -565,8 +587,7 @@ void vp9_filter_block2d_8x8_8_c(const uint8_t *src_ptr, const int16_t *VFilter_aligned16, uint8_t *dst_ptr, unsigned int dst_stride) { - filter_block2d_8_c(src_ptr, src_stride, - HFilter_aligned16, VFilter_aligned16, + filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16, VPX_FILTER_8x8, dst_ptr, dst_stride); } @@ -576,8 +597,7 @@ void vp9_filter_block2d_16x16_8_c(const uint8_t *src_ptr, const int16_t *VFilter_aligned16, uint8_t *dst_ptr, unsigned int dst_stride) { - filter_block2d_8_c(src_ptr, src_stride, - HFilter_aligned16, VFilter_aligned16, + filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16, VPX_FILTER_16x16, dst_ptr, dst_stride); } @@ -600,20 +620,19 @@ static void block2d_average_c(uint8_t *src, #define block2d_average block2d_average_c -void vp9_eighttap_predict_c(uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - uint8_t *dst_ptr, - int dst_pitch) { +void vp9_eighttap_predict4x4_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { const int16_t *HFilter; const int16_t *VFilter; HFilter = vp9_sub_pel_filters_8[xoffset]; VFilter = vp9_sub_pel_filters_8[yoffset]; - vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, + vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); } @@ -627,24 +646,39 @@ void vp9_eighttap_predict_avg4x4_c(uint8_t *src_ptr, const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; uint8_t tmp[4 * 4]; - vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, - tmp, 4); + vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp, + 4); block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4); } -void vp9_eighttap_predict_sharp_c(uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - uint8_t *dst_ptr, - int dst_pitch) { +void vp9_eighttap_predict4x4_sharp_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { const int16_t *HFilter; const int16_t *VFilter; HFilter = vp9_sub_pel_filters_8s[xoffset]; VFilter = vp9_sub_pel_filters_8s[yoffset]; + vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, + dst_ptr, dst_pitch); +} + +void vp9_eighttap_predict4x4_smooth_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter; + const int16_t *VFilter; + + HFilter = vp9_sub_pel_filters_8lp[xoffset]; + VFilter = vp9_sub_pel_filters_8lp[yoffset]; + vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); @@ -660,12 +694,27 @@ void vp9_eighttap_predict_avg4x4_sharp_c(uint8_t *src_ptr, const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; uint8_t tmp[4 * 4]; - vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, - tmp, 4); + vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp, + 4); block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4); } +void vp9_eighttap_predict_avg4x4_smooth_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset]; + uint8_t tmp[4 * 4]; + + vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp, + 4); + block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4); +} + + void vp9_eighttap_predict8x8_c(uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -675,8 +724,7 @@ void vp9_eighttap_predict8x8_c(uint8_t *src_ptr, const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset]; const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; - vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, + vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); } @@ -689,8 +737,20 @@ void vp9_eighttap_predict8x8_sharp_c(uint8_t *src_ptr, const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset]; const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; - vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, + vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, + dst_ptr, dst_pitch); +} + +void vp9_eighttap_predict8x8_smooth_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset]; + + vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); } @@ -704,9 +764,8 @@ void vp9_eighttap_predict_avg8x8_c(uint8_t *src_ptr, const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset]; const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; - vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, - tmp, 8); + vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp, + 8); block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8); } @@ -720,9 +779,23 @@ void vp9_eighttap_predict_avg8x8_sharp_c(uint8_t *src_ptr, const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset]; const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; - vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, - tmp, 8); + vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp, + 8); + block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8); +} + +void vp9_eighttap_predict_avg8x8_smooth_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + uint8_t tmp[8 * 8]; + const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset]; + + vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp, + 8); block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8); } @@ -735,8 +808,7 @@ void vp9_eighttap_predict8x4_c(uint8_t *src_ptr, const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset]; const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; - vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, + vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); } @@ -749,8 +821,20 @@ void vp9_eighttap_predict8x4_sharp_c(uint8_t *src_ptr, const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset]; const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; - vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, + vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, + dst_ptr, dst_pitch); +} + +void vp9_eighttap_predict8x4_smooth_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset]; + + vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, dst_ptr, dst_pitch); } @@ -763,9 +847,8 @@ void vp9_eighttap_predict16x16_c(uint8_t *src_ptr, const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset]; const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; - vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, - dst_ptr, dst_pitch); + vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter, + dst_ptr, dst_pitch); } void vp9_eighttap_predict16x16_sharp_c(uint8_t *src_ptr, @@ -777,9 +860,21 @@ void vp9_eighttap_predict16x16_sharp_c(uint8_t *src_ptr, const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset]; const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; - vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, - dst_ptr, dst_pitch); + vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter, + dst_ptr, dst_pitch); +} + +void vp9_eighttap_predict16x16_smooth_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset]; + + vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter, + dst_ptr, dst_pitch); } void vp9_eighttap_predict_avg16x16_c(uint8_t *src_ptr, @@ -792,9 +887,8 @@ void vp9_eighttap_predict_avg16x16_c(uint8_t *src_ptr, const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset]; const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset]; - vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, - tmp, 16); + vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter, + tmp, 16); block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16); } @@ -808,9 +902,23 @@ void vp9_eighttap_predict_avg16x16_sharp_c(uint8_t *src_ptr, const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset]; const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset]; - vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, - HFilter, VFilter, - tmp, 16); + vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter, + tmp, 16); + block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16); +} + +void vp9_eighttap_predict_avg16x16_smooth_c(uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + uint8_t *dst_ptr, + int dst_pitch) { + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp, 16 * 16); + const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset]; + const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset]; + + vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter, + tmp, 16); block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16); } diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h index 807a6b2ec..cd666578d 100644 --- a/vp9/common/vp9_filter.h +++ b/vp9/common/vp9_filter.h @@ -25,5 +25,6 @@ extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS][2]; extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6]; extern const int16_t vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]; extern const int16_t vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]; +extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]; #endif // VP9_COMMON_VP9_FILTER_H_ diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index bdff48bf3..440125f91 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -278,13 +278,6 @@ typedef struct VP9Common { struct postproc_state postproc_state; #endif -#if CONFIG_PRED_FILTER - /* Prediction filter variables */ - int pred_filter_mode; // 0=disabled at the frame level (no MB filtered) - // 1=enabled at the frame level (all MB filtered) - // 2=specified per MB (1=filtered, 0=non-filtered) - vp9_prob prob_pred_filter_off; -#endif #if CONFIG_COMP_INTERINTRA_PRED int use_interintra; #endif diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 7a7283183..e6561128c 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -18,39 +18,53 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERPOLATIONFILTERTYPE mcomp_filter_type, VP9_COMMON *cm) { +#if CONFIG_ENABLE_6TAP if (mcomp_filter_type == SIXTAP) { - xd->subpixel_predict = vp9_sixtap_predict; + xd->subpixel_predict4x4 = vp9_sixtap_predict4x4; xd->subpixel_predict8x4 = vp9_sixtap_predict8x4; xd->subpixel_predict8x8 = vp9_sixtap_predict8x8; xd->subpixel_predict16x16 = vp9_sixtap_predict16x16; - xd->subpixel_predict_avg = vp9_sixtap_predict_avg; + xd->subpixel_predict_avg4x4 = vp9_sixtap_predict_avg4x4; xd->subpixel_predict_avg8x8 = vp9_sixtap_predict_avg8x8; xd->subpixel_predict_avg16x16 = vp9_sixtap_predict_avg16x16; - } else if (mcomp_filter_type == EIGHTTAP || mcomp_filter_type == SWITCHABLE) { - xd->subpixel_predict = vp9_eighttap_predict; + } else { +#endif + if (mcomp_filter_type == EIGHTTAP || mcomp_filter_type == SWITCHABLE) { + xd->subpixel_predict4x4 = vp9_eighttap_predict4x4; xd->subpixel_predict8x4 = vp9_eighttap_predict8x4; xd->subpixel_predict8x8 = vp9_eighttap_predict8x8; xd->subpixel_predict16x16 = vp9_eighttap_predict16x16; - xd->subpixel_predict_avg = vp9_eighttap_predict_avg4x4; + xd->subpixel_predict_avg4x4 = vp9_eighttap_predict_avg4x4; xd->subpixel_predict_avg8x8 = vp9_eighttap_predict_avg8x8; xd->subpixel_predict_avg16x16 = vp9_eighttap_predict_avg16x16; + } else if (mcomp_filter_type == EIGHTTAP_SMOOTH) { + xd->subpixel_predict4x4 = vp9_eighttap_predict4x4_smooth; + xd->subpixel_predict8x4 = vp9_eighttap_predict8x4_smooth; + xd->subpixel_predict8x8 = vp9_eighttap_predict8x8_smooth; + xd->subpixel_predict16x16 = vp9_eighttap_predict16x16_smooth; + xd->subpixel_predict_avg4x4 = vp9_eighttap_predict_avg4x4_smooth; + xd->subpixel_predict_avg8x8 = vp9_eighttap_predict_avg8x8_smooth; + xd->subpixel_predict_avg16x16 = vp9_eighttap_predict_avg16x16_smooth; } else if (mcomp_filter_type == EIGHTTAP_SHARP) { - xd->subpixel_predict = vp9_eighttap_predict_sharp; + xd->subpixel_predict4x4 = vp9_eighttap_predict4x4_sharp; xd->subpixel_predict8x4 = vp9_eighttap_predict8x4_sharp; xd->subpixel_predict8x8 = vp9_eighttap_predict8x8_sharp; xd->subpixel_predict16x16 = vp9_eighttap_predict16x16_sharp; - xd->subpixel_predict_avg = vp9_eighttap_predict_avg4x4_sharp; + xd->subpixel_predict_avg4x4 = vp9_eighttap_predict_avg4x4_sharp; xd->subpixel_predict_avg8x8 = vp9_eighttap_predict_avg8x8_sharp; xd->subpixel_predict_avg16x16 = vp9_eighttap_predict_avg16x16_sharp_c; } else { - xd->subpixel_predict = vp9_bilinear_predict4x4; + xd->subpixel_predict4x4 = vp9_bilinear_predict4x4; xd->subpixel_predict8x4 = vp9_bilinear_predict8x4; xd->subpixel_predict8x8 = vp9_bilinear_predict8x8; xd->subpixel_predict16x16 = vp9_bilinear_predict16x16; - xd->subpixel_predict_avg = vp9_bilinear_predict_avg4x4; + xd->subpixel_predict_avg4x4 = vp9_bilinear_predict_avg4x4; xd->subpixel_predict_avg8x8 = vp9_bilinear_predict_avg8x8; xd->subpixel_predict_avg16x16 = vp9_bilinear_predict_avg16x16; } +#if CONFIG_ENABLE_6TAP + } +#endif } void vp9_copy_mem16x16_c(uint8_t *src, @@ -313,132 +327,6 @@ static void build_inter_predictors2b(MACROBLOCKD *xd, BLOCKD *d, int pitch) { } } - -/*encoder only*/ -#if CONFIG_PRED_FILTER - -// Select the thresholded or non-thresholded filter -#define USE_THRESH_FILTER 0 - -#define PRED_FILT_LEN 5 - -static const int filt_shift = 4; -static const int pred_filter[PRED_FILT_LEN] = {1, 2, 10, 2, 1}; -// Alternative filter {1, 1, 4, 1, 1} - -#if !USE_THRESH_FILTER -void filter_mb(uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int width, int height) { - int i, j, k; - unsigned int temp[32 * 32]; - unsigned int *pTmp = temp; - uint8_t *pSrc = src - (1 + src_stride) * (PRED_FILT_LEN / 2); - - // Horizontal - for (i = 0; i < height + PRED_FILT_LEN - 1; i++) { - for (j = 0; j < width; j++) { - int sum = 0; - for (k = 0; k < PRED_FILT_LEN; k++) - sum += pSrc[j + k] * pred_filter[k]; - pTmp[j] = sum; - } - - pSrc += src_stride; - pTmp += width; - } - - // Vertical - pTmp = temp; - for (i = 0; i < width; i++) { - uint8_t *pDst = dst + i; - for (j = 0; j < height; j++) { - int sum = 0; - for (k = 0; k < PRED_FILT_LEN; k++) - sum += pTmp[(j + k) * width] * pred_filter[k]; - // Round - pDst[j * dst_stride] = (sum + ((1 << (filt_shift << 1)) >> 1)) >> - (filt_shift << 1); - } - ++pTmp; - } -} -#else -// Based on vp9_post_proc_down_and_across_c (vp9_postproc.c) -void filter_mb(uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int width, int height) { - uint8_t *pSrc, *pDst; - int row; - int col; - int i; - int v; - uint8_t d[8]; - - /* TODO flimit should be linked to the quantizer value */ - int flimit = 7; - - for (row = 0; row < height; row++) { - /* post_proc_down for one row */ - pSrc = src; - pDst = dst; - - for (col = 0; col < width; col++) { - int kernel = (1 << (filt_shift - 1)); - int v = pSrc[col]; - - for (i = -2; i <= 2; i++) { - if (abs(v - pSrc[col + i * src_stride]) > flimit) - goto down_skip_convolve; - - kernel += pred_filter[2 + i] * pSrc[col + i * src_stride]; - } - - v = (kernel >> filt_shift); - down_skip_convolve: - pDst[col] = v; - } - - /* now post_proc_across */ - pSrc = dst; - pDst = dst; - - for (i = 0; i < 8; i++) - d[i] = pSrc[i]; - - for (col = 0; col < width; col++) { - int kernel = (1 << (filt_shift - 1)); - v = pSrc[col]; - - d[col & 7] = v; - - for (i = -2; i <= 2; i++) { - if (abs(v - pSrc[col + i]) > flimit) - goto across_skip_convolve; - - kernel += pred_filter[2 + i] * pSrc[col + i]; - } - - d[col & 7] = (kernel >> filt_shift); - across_skip_convolve: - - if (col >= 2) - pDst[col - 2] = d[(col - 2) & 7]; - } - - /* handle the last two pixels */ - pDst[col - 2] = d[(col - 2) & 7]; - pDst[col - 1] = d[(col - 1) & 7]; - - /* next row */ - src += src_stride; - dst += dst_stride; - } -} -#endif // !USE_THRESH_FILTER - -#endif // CONFIG_PRED_FILTER - /*encoder only*/ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) { int i, j; @@ -523,13 +411,13 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) { if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int) build_inter_predictors2b(xd, d0, 8); else { - vp9_build_inter_predictors_b(d0, 8, xd->subpixel_predict); - vp9_build_inter_predictors_b(d1, 8, xd->subpixel_predict); + vp9_build_inter_predictors_b(d0, 8, xd->subpixel_predict4x4); + vp9_build_inter_predictors_b(d1, 8, xd->subpixel_predict4x4); } if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - vp9_build_2nd_inter_predictors_b(d0, 8, xd->subpixel_predict_avg); - vp9_build_2nd_inter_predictors_b(d1, 8, xd->subpixel_predict_avg); + vp9_build_2nd_inter_predictors_b(d0, 8, xd->subpixel_predict_avg4x4); + vp9_build_2nd_inter_predictors_b(d1, 8, xd->subpixel_predict_avg4x4); } } } @@ -587,29 +475,6 @@ void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd, ptr = ptr_base + (ymv.as_mv.row >> 3) * pre_stride + (ymv.as_mv.col >> 3); -#if CONFIG_PRED_FILTER - if (xd->mode_info_context->mbmi.pred_filter_enabled) { - if ((ymv.as_mv.row | ymv.as_mv.col) & 7) { - // Sub-pel filter needs extended input - int len = 15 + (VP9_INTERP_EXTEND << 1); - uint8_t Temp[32 * 32]; // Data required by sub-pel filter - uint8_t *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); - - // Copy extended MB into Temp array, applying the spatial filter - filter_mb(ptr - (VP9_INTERP_EXTEND - 1) * (pre_stride + 1), pre_stride, - Temp, len, len, len); - - // Sub-pel interpolation - xd->subpixel_predict16x16(pTemp, len, - (ymv.as_mv.col & 7) << 1, - (ymv.as_mv.row & 7) << 1, - dst_y, dst_ystride); - } else { - // Apply spatial filter to create the prediction directly - filter_mb(ptr, pre_stride, dst_y, dst_ystride, 16, 16); - } - } else -#endif if ((ymv.as_mv.row | ymv.as_mv.col) & 7) { xd->subpixel_predict16x16(ptr, pre_stride, (ymv.as_mv.col & 7) << 1, @@ -658,37 +523,6 @@ void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd, uptr = xd->pre.u_buffer + offset; vptr = xd->pre.v_buffer + offset; -#if CONFIG_PRED_FILTER - if (xd->mode_info_context->mbmi.pred_filter_enabled) { - int i; - uint8_t *pSrc = uptr; - uint8_t *pDst = dst_u; - int len = 7 + (VP9_INTERP_EXTEND << 1); - uint8_t Temp[32 * 32]; // Data required by the sub-pel filter - uint8_t *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); - - // U & V - for (i = 0; i < 2; i++) { - if (_o16x16mv.as_int & 0x000f000f) { - // Copy extended MB into Temp array, applying the spatial filter - filter_mb(pSrc - (VP9_INTERP_EXTEND - 1) * (pre_stride + 1), pre_stride, - Temp, len, len, len); - - // Sub-pel filter - xd->subpixel_predict8x8(pTemp, len, - _o16x16mv.as_mv.col & 15, - _o16x16mv.as_mv.row & 15, - pDst, dst_uvstride); - } else { - filter_mb(pSrc, pre_stride, pDst, dst_uvstride, 8, 8); - } - - // V - pSrc = vptr; - pDst = dst_v; - } - } else -#endif if (_o16x16mv.as_int & 0x000f000f) { xd->subpixel_predict8x8(uptr, pre_stride, _o16x16mv.as_mv.col & 15, _o16x16mv.as_mv.row & 15, dst_u, dst_uvstride); @@ -849,9 +683,9 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, /* * The following functions should be called after an initial * call to vp9_build_1st_inter16x16_predictors_mb() or _mby()/_mbuv(). - * It will run a second sixtap filter on a (different) ref + * It will run a second filter on a (different) ref * frame and average the result with the output of the - * first sixtap filter. The second reference frame is stored + * first filter. The second reference frame is stored * in x->second_pre (the reference frame index is in * x->mode_info_context->mbmi.second_ref_frame). The second * motion vector is x->mode_info_context->mbmi.second_mv. @@ -882,35 +716,11 @@ void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd, ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); -#if CONFIG_PRED_FILTER - if (xd->mode_info_context->mbmi.pred_filter_enabled) { - if ((mv_row | mv_col) & 7) { - // Sub-pel filter needs extended input - int len = 15 + (VP9_INTERP_EXTEND << 1); - uint8_t Temp[32 * 32]; // Data required by sub-pel filter - uint8_t *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); - - // Copy extended MB into Temp array, applying the spatial filter - filter_mb(ptr - (VP9_INTERP_EXTEND - 1) * (pre_stride + 1), pre_stride, - Temp, len, len, len); - - // Sub-pel filter - xd->subpixel_predict_avg16x16(pTemp, len, (mv_col & 7) << 1, - (mv_row & 7) << 1, dst_y, dst_ystride); - } else { - // TODO Needs to AVERAGE with the dst_y - // For now, do not apply the prediction filter in these cases! - vp9_avg_mem16x16(ptr, pre_stride, dst_y, dst_ystride); - } - } else -#endif // CONFIG_PRED_FILTER - { - if ((mv_row | mv_col) & 7) { - xd->subpixel_predict_avg16x16(ptr, pre_stride, (mv_col & 7) << 1, - (mv_row & 7) << 1, dst_y, dst_ystride); - } else { - vp9_avg_mem16x16(ptr, pre_stride, dst_y, dst_ystride); - } + if ((mv_row | mv_col) & 7) { + xd->subpixel_predict_avg16x16(ptr, pre_stride, (mv_col & 7) << 1, + (mv_row & 7) << 1, dst_y, dst_ystride); + } else { + vp9_avg_mem16x16(ptr, pre_stride, dst_y, dst_ystride); } } @@ -950,37 +760,6 @@ void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd, uptr = xd->second_pre.u_buffer + offset; vptr = xd->second_pre.v_buffer + offset; -#if CONFIG_PRED_FILTER - if (xd->mode_info_context->mbmi.pred_filter_enabled) { - int i; - int len = 7 + (VP9_INTERP_EXTEND << 1); - uint8_t Temp[32 * 32]; // Data required by sub-pel filter - uint8_t *pTemp = Temp + (VP9_INTERP_EXTEND - 1) * (len + 1); - uint8_t *pSrc = uptr; - uint8_t *pDst = dst_u; - - // U & V - for (i = 0; i < 2; i++) { - if ((omv_row | omv_col) & 15) { - // Copy extended MB into Temp array, applying the spatial filter - filter_mb(pSrc - (VP9_INTERP_EXTEND - 1) * (pre_stride + 1), pre_stride, - Temp, len, len, len); - - // Sub-pel filter - xd->subpixel_predict_avg8x8(pTemp, len, omv_col & 15, - omv_row & 15, pDst, dst_uvstride); - } else { - // TODO Needs to AVERAGE with the dst_[u|v] - // For now, do not apply the prediction filter here! - vp9_avg_mem8x8(pSrc, pre_stride, pDst, dst_uvstride); - } - - // V - pSrc = vptr; - pDst = dst_v; - } - } else -#endif // CONFIG_PRED_FILTER if ((omv_row | omv_col) & 15) { xd->subpixel_predict_avg8x8(uptr, pre_stride, omv_col & 15, omv_row & 15, dst_u, dst_uvstride); @@ -1058,13 +837,13 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) { if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int) build_inter_predictors2b(xd, d0, 16); else { - vp9_build_inter_predictors_b(d0, 16, xd->subpixel_predict); - vp9_build_inter_predictors_b(d1, 16, xd->subpixel_predict); + vp9_build_inter_predictors_b(d0, 16, xd->subpixel_predict4x4); + vp9_build_inter_predictors_b(d1, 16, xd->subpixel_predict4x4); } if (mbmi->second_ref_frame > 0) { - vp9_build_2nd_inter_predictors_b(d0, 16, xd->subpixel_predict_avg); - vp9_build_2nd_inter_predictors_b(d1, 16, xd->subpixel_predict_avg); + vp9_build_2nd_inter_predictors_b(d0, 16, xd->subpixel_predict_avg4x4); + vp9_build_2nd_inter_predictors_b(d1, 16, xd->subpixel_predict_avg4x4); } } } @@ -1076,13 +855,13 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) { if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int) build_inter_predictors2b(xd, d0, 8); else { - vp9_build_inter_predictors_b(d0, 8, xd->subpixel_predict); - vp9_build_inter_predictors_b(d1, 8, xd->subpixel_predict); + vp9_build_inter_predictors_b(d0, 8, xd->subpixel_predict4x4); + vp9_build_inter_predictors_b(d1, 8, xd->subpixel_predict4x4); } if (mbmi->second_ref_frame > 0) { - vp9_build_2nd_inter_predictors_b(d0, 8, xd->subpixel_predict_avg); - vp9_build_2nd_inter_predictors_b(d1, 8, xd->subpixel_predict_avg); + vp9_build_2nd_inter_predictors_b(d0, 8, xd->subpixel_predict_avg4x4); + vp9_build_2nd_inter_predictors_b(d1, 8, xd->subpixel_predict_avg4x4); } } } diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index d35854580..329c0929e 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -310,8 +310,8 @@ specialize vp9_eighttap_predict_avg4x4 prototype void vp9_eighttap_predict8x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" specialize vp9_eighttap_predict8x4 -prototype void vp9_eighttap_predict "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" -specialize vp9_eighttap_predict +prototype void vp9_eighttap_predict4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" +specialize vp9_eighttap_predict4x4 prototype void vp9_eighttap_predict16x16_sharp "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" specialize vp9_eighttap_predict16x16_sharp @@ -331,8 +331,29 @@ specialize vp9_eighttap_predict_avg4x4_sharp prototype void vp9_eighttap_predict8x4_sharp "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" specialize vp9_eighttap_predict8x4_sharp -prototype void vp9_eighttap_predict_sharp "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" -specialize vp9_eighttap_predict_sharp +prototype void vp9_eighttap_predict4x4_sharp "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" +specialize vp9_eighttap_predict4x4_sharp + +prototype void vp9_eighttap_predict16x16_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" +specialize vp9_eighttap_predict16x16_smooth + +prototype void vp9_eighttap_predict8x8_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" +specialize vp9_eighttap_predict8x8_smooth + +prototype void vp9_eighttap_predict_avg16x16_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" +specialize vp9_eighttap_predict_avg16x16_smooth + +prototype void vp9_eighttap_predict_avg8x8_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" +specialize vp9_eighttap_predict_avg8x8_smooth + +prototype void vp9_eighttap_predict_avg4x4_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" +specialize vp9_eighttap_predict_avg4x4_smooth + +prototype void vp9_eighttap_predict8x4_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" +specialize vp9_eighttap_predict8x4_smooth + +prototype void vp9_eighttap_predict4x4_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" +specialize vp9_eighttap_predict4x4_smooth prototype void vp9_sixtap_predict16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" specialize vp9_sixtap_predict16x16 @@ -349,11 +370,11 @@ specialize vp9_sixtap_predict_avg8x8 prototype void vp9_sixtap_predict8x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" specialize vp9_sixtap_predict8x4 -prototype void vp9_sixtap_predict "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" -specialize vp9_sixtap_predict +prototype void vp9_sixtap_predict4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" +specialize vp9_sixtap_predict4x4 -prototype void vp9_sixtap_predict_avg "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" -specialize vp9_sixtap_predict_avg +prototype void vp9_sixtap_predict_avg4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" +specialize vp9_sixtap_predict_avg4x4 prototype void vp9_bilinear_predict16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch" specialize vp9_bilinear_predict16x16 sse2 diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 78108a55d..4b633df45 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -550,12 +550,6 @@ static void mb_mode_mv_init(VP9D_COMP *pbi, vp9_reader *bc) { if (!cm->kf_ymode_probs_update) cm->kf_ymode_probs_index = vp9_read_literal(bc, 3); } else { -#if CONFIG_PRED_FILTER - cm->pred_filter_mode = (vp9_prob)vp9_read_literal(bc, 2); - - if (cm->pred_filter_mode == 2) - cm->prob_pred_filter_off = (vp9_prob)vp9_read_literal(bc, 8); -#endif if (cm->mcomp_filter_type == SWITCHABLE) read_switchable_interp_probs(pbi, bc); #if CONFIG_COMP_INTERINTRA_PRED @@ -825,17 +819,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, #endif } - -#if CONFIG_PRED_FILTER - if (mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV) { - // Is the prediction filter enabled - if (cm->pred_filter_mode == 2) - mbmi->pred_filter_enabled = - vp9_read(bc, cm->prob_pred_filter_off); - else - mbmi->pred_filter_enabled = cm->pred_filter_mode; - } -#endif if (mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV) { if (cm->mcomp_filter_type == SWITCHABLE) { diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index e03651493..a7dac74e3 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -831,17 +831,6 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]); } -#if CONFIG_PRED_FILTER - // Is the prediction filter enabled - if (mode >= NEARESTMV && mode < SPLITMV) { - if (cpi->common.pred_filter_mode == 2) - vp9_write(bc, mi->pred_filter_enabled, - pc->prob_pred_filter_off); - else - assert(mi->pred_filter_enabled == - cpi->common.pred_filter_mode); - } -#endif if (mode >= NEARESTMV && mode <= SPLITMV) { if (cpi->common.mcomp_filter_type == SWITCHABLE) { write_token(bc, vp9_switchable_interp_tree, @@ -2023,15 +2012,6 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, active_section = 1; #endif -#if CONFIG_PRED_FILTER - // Write the prediction filter mode used for this frame - vp9_write_literal(&header_bc, pc->pred_filter_mode, 2); - - // Write prediction filter on/off probability if signaling at MB level - if (pc->pred_filter_mode == 2) - vp9_write_literal(&header_bc, pc->prob_pred_filter_off, 8); - -#endif if (pc->mcomp_filter_type == SWITCHABLE) update_switchable_interp_probs(cpi, &header_bc); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index ad27c6f39..d8478a15b 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1300,16 +1300,6 @@ static void encode_frame_internal(VP9_COMP *cpi) { cpi->skip_true_count[0] = cpi->skip_true_count[1] = cpi->skip_true_count[2] = 0; cpi->skip_false_count[0] = cpi->skip_false_count[1] = cpi->skip_false_count[2] = 0; -#if CONFIG_PRED_FILTER - if (cm->current_video_frame == 0) { - // Initially assume that we'll signal the prediction filter - // state at the frame level and that it is off. - cpi->common.pred_filter_mode = 0; - cpi->common.prob_pred_filter_off = 128; - } - cpi->pred_filter_on_count = 0; - cpi->pred_filter_off_count = 0; -#endif vp9_zero(cpi->switchable_interp_count); vp9_zero(cpi->best_switchable_interp_count); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 216a7fb95..a6b41fffb 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -889,11 +889,6 @@ void vp9_encode_inter16x16y(MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; BLOCK *b = &x->block[0]; -#if CONFIG_PRED_FILTER - // Disable the prediction filter for firstpass - xd->mode_info_context->mbmi.pred_filter_enabled = 0; -#endif - vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 084d20ed9..b8bf7de0d 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -18,9 +18,6 @@ typedef struct { MB_PREDICTION_MODE mode; MV_REFERENCE_FRAME ref_frame; MV_REFERENCE_FRAME second_ref_frame; -#if CONFIG_PRED_FILTER - int pred_filter_flag; -#endif } MODE_DEFINITION; diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index c319e07c0..0ff60c8b0 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -71,11 +71,6 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, & distortion, &sse); } -#if CONFIG_PRED_FILTER - // Disable the prediction filter - xd->mode_info_context->mbmi.pred_filter_enabled = 0; -#endif - vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv); vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride, diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index a56fcc0c0..f08ea6feb 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -49,7 +49,12 @@ extern void print_tree_update_probs(); static void set_default_lf_deltas(VP9_COMP *cpi); -#define DEFAULT_INTERP_FILTER EIGHTTAP /* SWITCHABLE for better performance */ +#if CONFIG_PRED_FILTER +#define DEFAULT_INTERP_FILTER SWITCHABLE +#else +#define DEFAULT_INTERP_FILTER EIGHTTAP +#endif + #define SEARCH_BEST_FILTER 0 /* to search exhaustively for best filter */ #define RESET_FOREACH_FILTER 0 /* whether to reset the encoder state @@ -632,47 +637,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { switch (Mode) { case 0: // best quality mode -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_ZEROMV ] = 0; - sf->thresh_mult[THR_ZEROMV_FILT ] = 0; - sf->thresh_mult[THR_ZEROG ] = 0; - sf->thresh_mult[THR_ZEROG_FILT ] = 0; - sf->thresh_mult[THR_ZEROA ] = 0; - sf->thresh_mult[THR_ZEROA_FILT ] = 0; - sf->thresh_mult[THR_NEARESTMV ] = 0; - sf->thresh_mult[THR_NEARESTMV_FILT] = 0; - sf->thresh_mult[THR_NEARESTG ] = 0; - sf->thresh_mult[THR_NEARESTG_FILT ] = 0; - sf->thresh_mult[THR_NEARESTA ] = 0; - sf->thresh_mult[THR_NEARESTA_FILT ] = 0; - sf->thresh_mult[THR_NEARMV ] = 0; - sf->thresh_mult[THR_NEARMV_FILT ] = 0; - sf->thresh_mult[THR_NEARG ] = 0; - sf->thresh_mult[THR_NEARG_FILT ] = 0; - sf->thresh_mult[THR_NEARA ] = 0; - sf->thresh_mult[THR_NEARA_FILT ] = 0; - - sf->thresh_mult[THR_DC ] = 0; - - sf->thresh_mult[THR_V_PRED ] = 1000; - sf->thresh_mult[THR_H_PRED ] = 1000; - sf->thresh_mult[THR_D45_PRED ] = 1000; - sf->thresh_mult[THR_D135_PRED] = 1000; - sf->thresh_mult[THR_D117_PRED] = 1000; - sf->thresh_mult[THR_D153_PRED] = 1000; - sf->thresh_mult[THR_D27_PRED ] = 1000; - sf->thresh_mult[THR_D63_PRED ] = 1000; - sf->thresh_mult[THR_B_PRED ] = 2000; - sf->thresh_mult[THR_I8X8_PRED] = 2000; - sf->thresh_mult[THR_TM ] = 1000; - - sf->thresh_mult[THR_NEWMV ] = 1000; - sf->thresh_mult[THR_NEWG ] = 1000; - sf->thresh_mult[THR_NEWA ] = 1000; - sf->thresh_mult[THR_NEWMV_FILT ] = 1000; - sf->thresh_mult[THR_NEWG_FILT ] = 1000; - sf->thresh_mult[THR_NEWA_FILT ] = 1000; -#else sf->thresh_mult[THR_ZEROMV ] = 0; sf->thresh_mult[THR_ZEROG ] = 0; sf->thresh_mult[THR_ZEROA ] = 0; @@ -700,7 +664,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->thresh_mult[THR_NEWMV ] = 1000; sf->thresh_mult[THR_NEWG ] = 1000; sf->thresh_mult[THR_NEWA ] = 1000; -#endif + sf->thresh_mult[THR_SPLITMV ] = 2500; sf->thresh_mult[THR_SPLITG ] = 5000; sf->thresh_mult[THR_SPLITA ] = 5000; @@ -743,66 +707,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->search_best_filter = SEARCH_BEST_FILTER; break; case 1: -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTMV] = 0; - sf->thresh_mult[THR_NEARESTMV_FILT] = 0; - sf->thresh_mult[THR_ZEROMV ] = 0; - sf->thresh_mult[THR_ZEROMV_FILT ] = 0; - sf->thresh_mult[THR_DC ] = 0; - sf->thresh_mult[THR_NEARMV ] = 0; - sf->thresh_mult[THR_NEARMV_FILT ] = 0; - sf->thresh_mult[THR_V_PRED ] = 1000; - sf->thresh_mult[THR_H_PRED ] = 1000; - sf->thresh_mult[THR_D45_PRED ] = 1000; - sf->thresh_mult[THR_D135_PRED] = 1000; - sf->thresh_mult[THR_D117_PRED] = 1000; - sf->thresh_mult[THR_D153_PRED] = 1000; - sf->thresh_mult[THR_D27_PRED ] = 1000; - sf->thresh_mult[THR_D63_PRED ] = 1000; - sf->thresh_mult[THR_B_PRED ] = 2500; - sf->thresh_mult[THR_I8X8_PRED] = 2500; - sf->thresh_mult[THR_TM ] = 1000; - - sf->thresh_mult[THR_NEARESTG ] = 1000; - sf->thresh_mult[THR_NEARESTG_FILT ] = 1000; - sf->thresh_mult[THR_NEARESTA ] = 1000; - sf->thresh_mult[THR_NEARESTA_FILT ] = 1000; - - sf->thresh_mult[THR_ZEROG ] = 1000; - sf->thresh_mult[THR_ZEROA ] = 1000; - sf->thresh_mult[THR_NEARG ] = 1000; - sf->thresh_mult[THR_NEARA ] = 1000; - sf->thresh_mult[THR_ZEROG_FILT ] = 1000; - sf->thresh_mult[THR_ZEROA_FILT ] = 1000; - sf->thresh_mult[THR_NEARG_FILT ] = 1000; - sf->thresh_mult[THR_NEARA_FILT ] = 1000; - - sf->thresh_mult[THR_ZEROMV ] = 0; - sf->thresh_mult[THR_ZEROG ] = 0; - sf->thresh_mult[THR_ZEROA ] = 0; - sf->thresh_mult[THR_NEARESTMV] = 0; - sf->thresh_mult[THR_NEARESTG ] = 0; - sf->thresh_mult[THR_NEARESTA ] = 0; - sf->thresh_mult[THR_NEARMV ] = 0; - sf->thresh_mult[THR_NEARG ] = 0; - sf->thresh_mult[THR_NEARA ] = 0; - sf->thresh_mult[THR_ZEROMV_FILT ] = 0; - sf->thresh_mult[THR_ZEROG_FILT ] = 0; - sf->thresh_mult[THR_ZEROA_FILT ] = 0; - sf->thresh_mult[THR_NEARESTMV_FILT] = 0; - sf->thresh_mult[THR_NEARESTG_FILT ] = 0; - sf->thresh_mult[THR_NEARESTA_FILT ] = 0; - sf->thresh_mult[THR_NEARMV_FILT ] = 0; - sf->thresh_mult[THR_NEARG_FILT ] = 0; - sf->thresh_mult[THR_NEARA_FILT ] = 0; - - sf->thresh_mult[THR_NEWMV ] = 1000; - sf->thresh_mult[THR_NEWG ] = 1000; - sf->thresh_mult[THR_NEWA ] = 1000; - sf->thresh_mult[THR_NEWMV_FILT ] = 1000; - sf->thresh_mult[THR_NEWG_FILT ] = 1000; - sf->thresh_mult[THR_NEWA_FILT ] = 1000; -#else sf->thresh_mult[THR_NEARESTMV] = 0; sf->thresh_mult[THR_ZEROMV ] = 0; sf->thresh_mult[THR_DC ] = 0; @@ -840,7 +744,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->thresh_mult[THR_NEWMV ] = 1000; sf->thresh_mult[THR_NEWG ] = 1000; sf->thresh_mult[THR_NEWA ] = 1000; -#endif + sf->thresh_mult[THR_SPLITMV ] = 1700; sf->thresh_mult[THR_SPLITG ] = 4500; sf->thresh_mult[THR_SPLITA ] = 4500; @@ -916,9 +820,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { if (cpi->ref_frame_flags & VP9_LAST_FLAG) { sf->thresh_mult[THR_NEWMV ] = 2000; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEWMV_FILT ] = 2000; -#endif sf->thresh_mult[THR_SPLITMV ] = 10000; sf->thresh_mult[THR_COMP_SPLITLG ] = 20000; } @@ -928,12 +829,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->thresh_mult[THR_ZEROG ] = 1500; sf->thresh_mult[THR_NEARG ] = 1500; sf->thresh_mult[THR_NEWG ] = 2000; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTG_FILT ] = 1500; - sf->thresh_mult[THR_ZEROG_FILT ] = 1500; - sf->thresh_mult[THR_NEARG_FILT ] = 1500; - sf->thresh_mult[THR_NEWG_FILT ] = 2000; -#endif sf->thresh_mult[THR_SPLITG ] = 20000; sf->thresh_mult[THR_COMP_SPLITGA ] = 20000; } @@ -943,12 +838,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->thresh_mult[THR_ZEROA ] = 1500; sf->thresh_mult[THR_NEARA ] = 1500; sf->thresh_mult[THR_NEWA ] = 2000; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTA_FILT ] = 1500; - sf->thresh_mult[THR_ZEROA_FILT ] = 1500; - sf->thresh_mult[THR_NEARA_FILT ] = 1500; - sf->thresh_mult[THR_NEWA_FILT ] = 2000; -#endif sf->thresh_mult[THR_SPLITA ] = 20000; sf->thresh_mult[THR_COMP_SPLITLA ] = 10000; } @@ -1005,9 +894,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { if (cpi->ref_frame_flags & VP9_LAST_FLAG) { sf->thresh_mult[THR_NEWMV ] = 2000; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEWMV_FILT ] = 2000; -#endif sf->thresh_mult[THR_SPLITMV ] = 25000; sf->thresh_mult[THR_COMP_SPLITLG ] = 50000; } @@ -1017,12 +903,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->thresh_mult[THR_ZEROG ] = 2000; sf->thresh_mult[THR_NEARG ] = 2000; sf->thresh_mult[THR_NEWG ] = 2500; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTG_FILT ] = 2000; - sf->thresh_mult[THR_ZEROG_FILT ] = 2000; - sf->thresh_mult[THR_NEARG_FILT ] = 2000; - sf->thresh_mult[THR_NEWG_FILT ] = 2500; -#endif sf->thresh_mult[THR_SPLITG ] = 50000; sf->thresh_mult[THR_COMP_SPLITGA ] = 50000; } @@ -1032,12 +912,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->thresh_mult[THR_ZEROA ] = 2000; sf->thresh_mult[THR_NEARA ] = 2000; sf->thresh_mult[THR_NEWA ] = 2500; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTA_FILT ] = 2000; - sf->thresh_mult[THR_ZEROA_FILT ] = 2000; - sf->thresh_mult[THR_NEARA_FILT ] = 2000; - sf->thresh_mult[THR_NEWA_FILT ] = 2500; -#endif sf->thresh_mult[THR_SPLITA ] = 50000; sf->thresh_mult[THR_COMP_SPLITLA ] = 25000; } @@ -1088,12 +962,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->thresh_mult[THR_NEARESTMV] = INT_MAX; sf->thresh_mult[THR_ZEROMV ] = INT_MAX; sf->thresh_mult[THR_NEARMV ] = INT_MAX; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEWMV_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEARESTMV_FILT] = INT_MAX; - sf->thresh_mult[THR_ZEROMV_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEARMV_FILT ] = INT_MAX; -#endif sf->thresh_mult[THR_SPLITMV ] = INT_MAX; } @@ -1102,12 +970,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->thresh_mult[THR_ZEROG ] = INT_MAX; sf->thresh_mult[THR_NEARG ] = INT_MAX; sf->thresh_mult[THR_NEWG ] = INT_MAX; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTG_FILT ] = INT_MAX; - sf->thresh_mult[THR_ZEROG_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEARG_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEWG_FILT ] = INT_MAX; -#endif #if CONFIG_COMP_INTERINTRA_PRED sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = INT_MAX; sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = INT_MAX; @@ -1122,12 +984,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->thresh_mult[THR_ZEROA ] = INT_MAX; sf->thresh_mult[THR_NEARA ] = INT_MAX; sf->thresh_mult[THR_NEWA ] = INT_MAX; -#if CONFIG_PRED_FILTER - sf->thresh_mult[THR_NEARESTA_FILT ] = INT_MAX; - sf->thresh_mult[THR_ZEROA_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEARA_FILT ] = INT_MAX; - sf->thresh_mult[THR_NEWA_FILT ] = INT_MAX; -#endif #if CONFIG_COMP_INTERINTRA_PRED sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = INT_MAX; sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = INT_MAX; @@ -2823,7 +2679,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { void select_interp_filter_type(VP9_COMP *cpi) { int i; - int high_filter_index; + int high_filter_index = 0; unsigned int thresh; unsigned int high_count = 0; unsigned int count_sum = 0; @@ -2860,38 +2716,6 @@ void select_interp_filter_type(VP9_COMP *cpi) { } } -#if CONFIG_PRED_FILTER -void select_pred_filter_mode(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - int prob_pred_filter_off = cm->prob_pred_filter_off; - - // Force filter on/off if probability is extreme - if (prob_pred_filter_off >= 255 * 0.95) - cm->pred_filter_mode = 0; // Off at the frame level - else if (prob_pred_filter_off <= 255 * 0.05) - cm->pred_filter_mode = 1; // On at the frame level - else - cm->pred_filter_mode = 2; // Selectable at the MB level -} - -void update_pred_filt_prob(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - // Based on the selection in the previous frame determine what mode - // to use for the current frame and work out the signaling probability - cm->prob_pred_filter_off = get_binary_prob(cpi->pred_filter_off_count, - cpi->pred_filter_on_count); - /* - { - FILE *fp = fopen("filt_use.txt", "a"); - fprintf (fp, "%d %d prob=%d\n", cpi->pred_filter_off_count, - cpi->pred_filter_on_count, cm->prob_pred_filter_off); - fclose(fp); - } - */ -} -#endif #if CONFIG_COMP_INTERINTRA_PRED static void select_interintra_mode(VP9_COMP *cpi) { static const double threshold = 0.01; @@ -2949,7 +2773,11 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, /* list of filters to search over */ int mcomp_filters_to_search[] = { - EIGHTTAP, EIGHTTAP_SHARP, SIXTAP, SWITCHABLE +#if CONFIG_ENABLE_6TAP + EIGHTTAP, EIGHTTAP_SHARP, SIXTAP, SWITCHABLE +#else + EIGHTTAP, EIGHTTAP_SHARP, EIGHTTAP_SMOOTH, SWITCHABLE +#endif }; int mcomp_filters = sizeof(mcomp_filters_to_search) / sizeof(*mcomp_filters_to_search); @@ -3323,13 +3151,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_clear_system_state(); // __asm emms; -#if CONFIG_PRED_FILTER - // Update prediction filter on/off probability based on - // selection made for the current frame - if (cm->frame_type != KEY_FRAME) - update_pred_filt_prob(cpi); -#endif - // Dummy pack of the bitstream using up to date stats to get an // accurate estimate of output frame size to determine if we need // to recode. @@ -3634,13 +3455,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, update_reference_segmentation_map(cpi); } -#if CONFIG_PRED_FILTER - // Select the prediction filtering mode to use for the - // next frame based on the current frame selections - if (cm->frame_type != KEY_FRAME) - select_pred_filter_mode(cpi); -#endif - update_reference_frames(cm); vp9_copy(cpi->common.fc.coef_counts_4x4, cpi->coef_counts_4x4); vp9_copy(cpi->common.fc.hybrid_coef_counts_4x4, diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index dbe6e2bd6..d917c0e7f 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -41,19 +41,11 @@ #define AF_THRESH2 100 #define ARF_DECAY_THRESH 12 -#if CONFIG_PRED_FILTER -#if CONFIG_COMP_INTERINTRA_PRED -#define MAX_MODES 66 -#else -#define MAX_MODES 54 -#endif -#else // CONFIG_PRED_FILTER #if CONFIG_COMP_INTERINTRA_PRED #define MAX_MODES 54 #else #define MAX_MODES 42 #endif -#endif // CONFIG_PRED_FILTER #define MIN_THRESHMULT 32 #define MAX_THRESHMULT 512 @@ -173,94 +165,6 @@ typedef struct { MBGRAPH_MB_STATS *mb_stats; } MBGRAPH_FRAME_STATS; -#if CONFIG_PRED_FILTER -typedef enum { - THR_ZEROMV, - THR_ZEROMV_FILT, - THR_DC, - - THR_NEARESTMV, - THR_NEARESTMV_FILT, - THR_NEARMV, - THR_NEARMV_FILT, - - THR_ZEROG, - THR_ZEROG_FILT, - THR_NEARESTG, - THR_NEARESTG_FILT, - - THR_ZEROA, - THR_ZEROA_FILT, - THR_NEARESTA, - THR_NEARESTA_FILT, - - THR_NEARG, - THR_NEARG_FILT, - THR_NEARA, - THR_NEARA_FILT, - - THR_V_PRED, - THR_H_PRED, - THR_D45_PRED, - THR_D135_PRED, - THR_D117_PRED, - THR_D153_PRED, - THR_D27_PRED, - THR_D63_PRED, - THR_TM, - - THR_NEWMV, - THR_NEWMV_FILT, - THR_NEWG, - THR_NEWG_FILT, - THR_NEWA, - THR_NEWA_FILT, - - THR_SPLITMV, - THR_SPLITG, - THR_SPLITA, - - THR_B_PRED, - THR_I8X8_PRED, - - THR_COMP_ZEROLG, - THR_COMP_NEARESTLG, - THR_COMP_NEARLG, - - THR_COMP_ZEROLA, - THR_COMP_NEARESTLA, - THR_COMP_NEARLA, - - THR_COMP_ZEROGA, - THR_COMP_NEARESTGA, - THR_COMP_NEARGA, - - THR_COMP_NEWLG, - THR_COMP_NEWLA, - THR_COMP_NEWGA, - - THR_COMP_SPLITLG, - THR_COMP_SPLITLA, - THR_COMP_SPLITGA, -#if CONFIG_COMP_INTERINTRA_PRED - THR_COMP_INTERINTRA_ZEROL, - THR_COMP_INTERINTRA_NEARESTL, - THR_COMP_INTERINTRA_NEARL, - THR_COMP_INTERINTRA_NEWL, - - THR_COMP_INTERINTRA_ZEROG, - THR_COMP_INTERINTRA_NEARESTG, - THR_COMP_INTERINTRA_NEARG, - THR_COMP_INTERINTRA_NEWG, - - THR_COMP_INTERINTRA_ZEROA, - THR_COMP_INTERINTRA_NEARESTA, - THR_COMP_INTERINTRA_NEARA, - THR_COMP_INTERINTRA_NEWA, -#endif -} -THR_MODES; -#else typedef enum { THR_ZEROMV, THR_DC, @@ -335,7 +239,6 @@ typedef enum { #endif } THR_MODES; -#endif typedef enum { DIAMOND = 0, @@ -795,10 +698,6 @@ typedef struct VP9_COMP { int dummy_packing; /* flag to indicate if packing is dummy */ -#if CONFIG_PRED_FILTER - int pred_filter_on_count; - int pred_filter_off_count; -#endif unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS]; unsigned int best_switchable_interp_count[VP9_SWITCHABLE_FILTERS]; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 956d8f90c..7c8f41d84 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -70,97 +70,6 @@ static const int auto_speed_thresh[17] = { 105 }; -#if CONFIG_PRED_FILTER -const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { - {ZEROMV, LAST_FRAME, NONE, 0}, - {ZEROMV, LAST_FRAME, NONE, 1}, - {DC_PRED, INTRA_FRAME, NONE, 0}, - - {NEARESTMV, LAST_FRAME, NONE, 0}, - {NEARESTMV, LAST_FRAME, NONE, 1}, - {NEARMV, LAST_FRAME, NONE, 0}, - {NEARMV, LAST_FRAME, NONE, 1}, - - {ZEROMV, GOLDEN_FRAME, NONE, 0}, - {ZEROMV, GOLDEN_FRAME, NONE, 1}, - {NEARESTMV, GOLDEN_FRAME, NONE, 0}, - {NEARESTMV, GOLDEN_FRAME, NONE, 1}, - - {ZEROMV, ALTREF_FRAME, NONE, 0}, - {ZEROMV, ALTREF_FRAME, NONE, 1}, - {NEARESTMV, ALTREF_FRAME, NONE, 0}, - {NEARESTMV, ALTREF_FRAME, NONE, 1}, - - {NEARMV, GOLDEN_FRAME, NONE, 0}, - {NEARMV, GOLDEN_FRAME, NONE, 1}, - {NEARMV, ALTREF_FRAME, NONE, 0}, - {NEARMV, ALTREF_FRAME, NONE, 1}, - - {V_PRED, INTRA_FRAME, NONE, 0}, - {H_PRED, INTRA_FRAME, NONE, 0}, - {D45_PRED, INTRA_FRAME, NONE, 0}, - {D135_PRED, INTRA_FRAME, NONE, 0}, - {D117_PRED, INTRA_FRAME, NONE, 0}, - {D153_PRED, INTRA_FRAME, NONE, 0}, - {D27_PRED, INTRA_FRAME, NONE, 0}, - {D63_PRED, INTRA_FRAME, NONE, 0}, - - {TM_PRED, INTRA_FRAME, NONE, 0}, - - {NEWMV, LAST_FRAME, NONE, 0}, - {NEWMV, LAST_FRAME, NONE, 1}, - {NEWMV, GOLDEN_FRAME, NONE, 0}, - {NEWMV, GOLDEN_FRAME, NONE, 1}, - {NEWMV, ALTREF_FRAME, NONE, 0}, - {NEWMV, ALTREF_FRAME, NONE, 1}, - - {SPLITMV, LAST_FRAME, NONE, 0}, - {SPLITMV, GOLDEN_FRAME, NONE, 0}, - {SPLITMV, ALTREF_FRAME, NONE, 0}, - - {B_PRED, INTRA_FRAME, NONE, 0}, - {I8X8_PRED, INTRA_FRAME, NONE, 0}, - - /* compound prediction modes */ - {ZEROMV, LAST_FRAME, GOLDEN_FRAME, 0}, - {NEARESTMV, LAST_FRAME, GOLDEN_FRAME, 0}, - {NEARMV, LAST_FRAME, GOLDEN_FRAME, 0}, - - {ZEROMV, ALTREF_FRAME, LAST_FRAME, 0}, - {NEARESTMV, ALTREF_FRAME, LAST_FRAME, 0}, - {NEARMV, ALTREF_FRAME, LAST_FRAME, 0}, - - {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME, 0}, - {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME, 0}, - {NEARMV, GOLDEN_FRAME, ALTREF_FRAME, 0}, - - {NEWMV, LAST_FRAME, GOLDEN_FRAME, 0}, - {NEWMV, ALTREF_FRAME, LAST_FRAME, 0}, - {NEWMV, GOLDEN_FRAME, ALTREF_FRAME, 0}, - - {SPLITMV, LAST_FRAME, GOLDEN_FRAME, 0}, - {SPLITMV, ALTREF_FRAME, LAST_FRAME, 0}, - {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME, 0}, - -#if CONFIG_COMP_INTERINTRA_PRED - /* compound inter-intra prediction */ - {ZEROMV, LAST_FRAME, INTRA_FRAME, 0}, - {NEARESTMV, LAST_FRAME, INTRA_FRAME, 0}, - {NEARMV, LAST_FRAME, INTRA_FRAME, 0}, - {NEWMV, LAST_FRAME, INTRA_FRAME, 0}, - - {ZEROMV, GOLDEN_FRAME, INTRA_FRAME, 0}, - {NEARESTMV, GOLDEN_FRAME, INTRA_FRAME, 0}, - {NEARMV, GOLDEN_FRAME, INTRA_FRAME, 0}, - {NEWMV, GOLDEN_FRAME, INTRA_FRAME, 0}, - - {ZEROMV, ALTREF_FRAME, INTRA_FRAME, 0}, - {NEARESTMV, ALTREF_FRAME, INTRA_FRAME, 0}, - {NEARMV, ALTREF_FRAME, INTRA_FRAME, 0}, - {NEWMV, ALTREF_FRAME, INTRA_FRAME, 0}, -#endif -}; -#else const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {ZEROMV, LAST_FRAME, NONE}, {DC_PRED, INTRA_FRAME, NONE}, @@ -238,7 +147,6 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {NEWMV, ALTREF_FRAME, INTRA_FRAME}, #endif }; -#endif static void fill_token_costs(vp9_coeff_count *c, vp9_coeff_probs *p, @@ -2508,9 +2416,9 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x, BLOCK *be = &x->block[i]; int thisdistortion; - vp9_build_inter_predictors_b(bd, 16, xd->subpixel_predict); + vp9_build_inter_predictors_b(bd, 16, xd->subpixel_predict4x4); if (xd->mode_info_context->mbmi.second_ref_frame > 0) - vp9_build_2nd_inter_predictors_b(bd, 16, xd->subpixel_predict_avg); + vp9_build_2nd_inter_predictors_b(bd, 16, xd->subpixel_predict_avg4x4); vp9_subtract_b(be, bd, 16); x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, bd); @@ -3611,12 +3519,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mv[i].as_int = cur_mv[i].as_int; } -#if CONFIG_PRED_FILTER - // Filtered prediction: - mbmi->pred_filter_enabled = vp9_mode_order[mode_index].pred_filter_flag; - *rate2 += vp9_cost_bit(cpi->common.prob_pred_filter_off, - mbmi->pred_filter_enabled); -#endif if (cpi->common.mcomp_filter_type == SWITCHABLE) { const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP); const int m = vp9_switchable_interp_map[mbmi->interp_filter]; @@ -3854,9 +3756,6 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int rate_y, UNINITIALIZED_IS_SAFE(rate_uv); int distortion_uv = INT_MAX; int64_t best_yrd = LLONG_MAX; -#if CONFIG_PRED_FILTER - int best_filter_state = 0; -#endif int switchable_filter_index = 0; MB_PREDICTION_MODE uv_intra_mode; @@ -3969,9 +3868,6 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->uv_mode = DC_PRED; mbmi->ref_frame = vp9_mode_order[mode_index].ref_frame; mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; -#if CONFIG_PRED_FILTER - mbmi->pred_filter_enabled = 0; -#endif // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. @@ -4396,21 +4292,11 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, best_overall_rd = this_rd; best_filter = mbmi->interp_filter; best_mode = this_mode; -#if CONFIG_PRED_FILTER - best_filter_state = mbmi->pred_filter_enabled; -#endif #if CONFIG_COMP_INTERINTRA_PRED is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME); #endif } -#if CONFIG_PRED_FILTER - // Ignore modes where the prediction filter state doesn't - // match the state signaled at the frame level - if ((cm->pred_filter_mode == 2) || - (cm->pred_filter_mode == - mbmi->pred_filter_enabled)) { -#endif // Did this mode help.. i.e. is it the new best mode if (this_rd < best_rd || x->skip) { if (!mode_excluded) { @@ -4519,21 +4405,11 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (x->skip && !mode_excluded) break; } -#if CONFIG_PRED_FILTER - } -#endif assert((cm->mcomp_filter_type == SWITCHABLE) || (cm->mcomp_filter_type == best_mbmode.interp_filter) || (best_mbmode.mode <= B_PRED)); -#if CONFIG_PRED_FILTER - // Update counts for prediction filter usage - if (best_filter_state != 0) - ++cpi->pred_filter_on_count; - else - ++cpi->pred_filter_off_count; -#endif #if CONFIG_COMP_INTERINTRA_PRED ++cpi->interintra_select_count[is_best_interintra]; #endif From 51bae955e6a554e0e289f211be546555647d49be Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Wed, 19 Dec 2012 15:53:03 -0800 Subject: [PATCH 55/77] experiment a wider loop filter for MB border when larger transforms are used Change-Id: I25251442b44bf251df4c25a1c1fcf71fb2ad913b --- configure | 1 + vp9/common/vp9_loopfilter.c | 52 ++++-- vp9/common/vp9_loopfilter.h | 6 + vp9/common/vp9_loopfilter_filters.c | 261 ++++++++++++++++++++++++++-- 4 files changed, 292 insertions(+), 28 deletions(-) diff --git a/configure b/configure index 0a5825194..f6afac736 100755 --- a/configure +++ b/configure @@ -254,6 +254,7 @@ EXPERIMENT_LIST=" cnvcontext newcoefcontext enable_6tap + widerlpf " CONFIG_LIST=" external_build diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index d91f98ebf..8c13aaf56 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -183,7 +183,15 @@ static int mb_lf_skip(const MB_MODE_INFO *const mbmi) { return mode != B_PRED && mode != I8X8_PRED && mode != SPLITMV && tx_size >= TX_16X16 && skip_coef; } - +static int sb_mb_lf_skip(const MODE_INFO *const mip0, + const MODE_INFO *const mip1) { + return mb_lf_skip(&mip0->mbmi) && + mb_lf_skip(&mip1->mbmi) && +#if CONFIG_TX32X32 + mip0->mbmi.txfm_size >= TX_32X32 && +#endif + mip0->mbmi.ref_frame; +} void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd, int frame_filter_level, @@ -229,15 +237,19 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, lfi.lim = lfi_n->lim[filter_level]; lfi.hev_thr = lfi_n->hev_thr[hev_index]; - if (mb_col > 0 && - !((mb_col & 1) && mode_info_context->mbmi.sb_type && - ((skip_lf && mb_lf_skip(&mode_info_context[-1].mbmi)) -#if CONFIG_TX32X32 - || tx_size == TX_32X32 -#endif - ))) - vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, + if (mb_col > 0 + && !((mb_col & 1) && mode_info_context->mbmi.sb_type && + sb_mb_lf_skip(mode_info_context - 1, mode_info_context)) + ) { +#if CONFIG_WIDERLPF + if (tx_size >= TX_16X16) + vp9_lpf_mbv_w(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); + else +#endif + vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, + post->uv_stride, &lfi); + } if (!skip_lf) { if (tx_size >= TX_8X8) vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, post->y_stride, @@ -248,19 +260,23 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, } /* don't apply across umv border */ - if (mb_row > 0 && - !((mb_row & 1) && mode_info_context->mbmi.sb_type && - ((skip_lf && mb_lf_skip(&mode_info_context[-mis].mbmi)) -#if CONFIG_TX32X32 - || tx_size == TX_32X32 -#endif - ))) - vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, + if (mb_row > 0 + && !((mb_row & 1) && mode_info_context->mbmi.sb_type && + sb_mb_lf_skip(mode_info_context - mis, mode_info_context)) + ) { +#if CONFIG_WIDERLPF + if (tx_size >= TX_16X16) + vp9_lpf_mbh_w(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); + else +#endif + vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, + post->uv_stride, &lfi); + } if (!skip_lf) { if (tx_size >= TX_8X8) vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); + post->uv_stride, &lfi); else vp9_loop_filter_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h index dbe8e1f83..8b752aa5c 100644 --- a/vp9/common/vp9_loopfilter.h +++ b/vp9/common/vp9_loopfilter.h @@ -92,4 +92,10 @@ void vp9_loop_filter_partial_frame(struct VP9Common *cm, void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl); +extern void vp9_lpf_mbv_w(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + struct loop_filter_info *lfi); +extern void vp9_lpf_mbh_w(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + struct loop_filter_info *lfi); #endif // VP9_COMMON_VP9_LOOPFILTER_H_ diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c index 1419c9960..c73e37ba3 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vp9/common/vp9_loopfilter_filters.c @@ -149,14 +149,14 @@ static __inline signed char flatmask(uint8_t thresh, uint8_t q0, uint8_t q1, uint8_t q2, uint8_t q3, uint8_t q4) { int8_t flat = 0; - flat |= (abs(p1 - p0) > 1) * -1; - flat |= (abs(q1 - q0) > 1) * -1; - flat |= (abs(p0 - p2) > 1) * -1; - flat |= (abs(q0 - q2) > 1) * -1; - flat |= (abs(p3 - p0) > 1) * -1; - flat |= (abs(q3 - q0) > 1) * -1; - flat |= (abs(p4 - p0) > 1) * -1; - flat |= (abs(q4 - q0) > 1) * -1; + flat |= (abs(p1 - p0) > thresh) * -1; + flat |= (abs(q1 - q0) > thresh) * -1; + flat |= (abs(p0 - p2) > thresh) * -1; + flat |= (abs(q0 - q2) > thresh) * -1; + flat |= (abs(p3 - p0) > thresh) * -1; + flat |= (abs(q3 - q0) > thresh) * -1; + flat |= (abs(p4 - p0) > thresh) * -1; + flat |= (abs(q4 - q0) > thresh) * -1; flat = ~flat; return flat; } @@ -254,7 +254,7 @@ void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, hev = hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]); - flat = flatmask(thresh[0], + flat = flatmask(1, s[-5 * p], s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p], s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p], s[ 4 * p]); mbfilter(mask, hev, flat, @@ -283,7 +283,7 @@ void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, s[0], s[1], s[2], s[3]); hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); - flat = flatmask(thresh[0], + flat = flatmask(1, s[-5], s[-4], s[-3], s[-2], s[-1], s[ 0], s[ 1], s[ 2], s[ 3], s[ 4]); mbfilter(mask, hev, flat, @@ -464,3 +464,244 @@ void vp9_loop_filter_bvs_c(uint8_t *y_ptr, int y_stride, vp9_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit); vp9_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit); } + +#if CONFIG_WIDERLPF +static __inline void wide_mbfilter(int8_t mask, uint8_t hev, + uint8_t flat, uint8_t flat2, + uint8_t *op7, uint8_t *op6, uint8_t *op5, + uint8_t *op4, uint8_t *op3, uint8_t *op2, + uint8_t *op1, uint8_t *op0, uint8_t *oq0, + uint8_t *oq1, uint8_t *oq2, uint8_t *oq3, + uint8_t *oq4, uint8_t *oq5, uint8_t *oq6, + uint8_t *oq7) { + /* use a 15 tap filter [1,1,1,1,1,1,1,2,1,1,1,1,1,1,1] for flat line */ + if (flat2 && flat && mask) { + uint8_t p0, q0; + uint8_t p1, q1; + uint8_t p2, q2; + uint8_t p3, q3; + uint8_t p4, q4; + uint8_t p5, q5; + uint8_t p6, q6; + uint8_t p7, q7; + + p7 = *op7; + p6 = *op6; + p5 = *op5; + p4 = *op4; + p3 = *op3; + p2 = *op2; + p1 = *op1; + p0 = *op0; + q0 = *oq0; + q1 = *oq1; + q2 = *oq2; + q3 = *oq3; + q4 = *oq4; + q5 = *oq5; + q6 = *oq6; + q7 = *oq7; + + *op6 = (p7 * 7 + p6 * 2 + + p5 + p4 + p3 + p2 + p1 + p0 + q0 + 8) >> 4; + *op5 = (p7 * 6 + p6 + p5 * 2 + + p4 + p3 + p2 + p1 + p0 + q0 + q1 + 8) >> 4; + *op4 = (p7 * 5 + p6 + p5 + p4 * 2 + + p3 + p2 + p1 + p0 + q0 + q1 + q2 + 8) >> 4; + *op3 = (p7 * 4 + p6 + p5 + p4 + p3 * 2 + + p2 + p1 + p0 + q0 + q1 + q2 + q3 + 8) >> 4; + *op2 = (p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + + p1 + p0 + q0 + q1 + q2 + q3 + q4 + 8) >> 4; + *op1 = (p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + + p0 + q0 + q1 + q2 + q3 + q4 + q5 + 8) >> 4; + *op0 = (p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + + q0 + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4; + *oq0 = (p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 + + q1 + q2 + q3 + q4 + q5 + q6 + q7 + 8) >> 4; + *oq1 = (p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 + + q2 + q3 + q4 + q5 + q6 + q7 * 2 + 8) >> 4; + *oq2 = (p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 + + q3 + q4 + q5 + q6 + q7 * 3 + 8) >> 4; + *oq3 = (p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 * 2 + + q4 + q5 + q6 + q7 * 4 + 8) >> 4; + *oq4 = (p2 + p1 + p0 + q0 + q1 + q2 + q3 + q4 * 2 + + q5 + q6 + q7 * 5 + 8) >> 4; + *oq5 = (p1 + p0 + q0 + q1 + q2 + q3 + q4 + q5 * 2 + + q6 + q7 * 6 + 8) >> 4; + *oq6 = (p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + + q7 * 7 + 8) >> 4; + } else if (flat && mask) { + unsigned char p0, q0; + unsigned char p1, q1; + unsigned char p2, q2; + unsigned char p3, q3; + unsigned char p4, q4; + + p4 = *op4; + p3 = *op3; + p2 = *op2; + p1 = *op1; + p0 = *op0; + q0 = *oq0; + q1 = *oq1; + q2 = *oq2; + q3 = *oq3; + q4 = *oq4; + + *op2 = (p4 + p4 + p3 + p2 + p2 + p1 + p0 + q0 + 4) >> 3; + *op1 = (p4 + p3 + p2 + p1 + p1 + p0 + q0 + q1 + 4) >> 3; + *op0 = (p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2 + 4) >> 3; + *oq0 = (p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3 + 4) >> 3; + *oq1 = (p1 + p0 + q0 + q1 + q1 + q2 + q3 + q4 + 4) >> 3; + *oq2 = (p0 + q0 + q1 + q2 + q2 + q3 + q4 + q4 + 4) >> 3; + } else { + signed char ps0, qs0; + signed char ps1, qs1; + signed char filter, Filter1, Filter2; + signed char u; + + ps1 = (signed char) * op1 ^ 0x80; + ps0 = (signed char) * op0 ^ 0x80; + qs0 = (signed char) * oq0 ^ 0x80; + qs1 = (signed char) * oq1 ^ 0x80; + + /* add outer taps if we have high edge variance */ + filter = signed_char_clamp(ps1 - qs1); + filter &= hev; + + /* inner taps */ + filter = signed_char_clamp(filter + 3 * (qs0 - ps0)); + filter &= mask; + + Filter1 = signed_char_clamp(filter + 4); + Filter2 = signed_char_clamp(filter + 3); + Filter1 >>= 3; + Filter2 >>= 3; + + u = signed_char_clamp(qs0 - Filter1); + *oq0 = u ^ 0x80; + u = signed_char_clamp(ps0 + Filter2); + *op0 = u ^ 0x80; + filter = Filter1; + + /* outer tap adjustments */ + filter += 1; + filter >>= 1; + filter &= ~hev; + + u = signed_char_clamp(qs1 - filter); + *oq1 = u ^ 0x80; + u = signed_char_clamp(ps1 + filter); + *op1 = u ^ 0x80; + } +} + +static void vp9_mb_lpf_horizontal_edge_w +( + unsigned char *s, + int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count +) { + signed char hev = 0; /* high edge variance */ + signed char mask = 0; + signed char flat = 0; + signed char flat2 = 0; + int i = 0; + + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ + do { + mask = filter_mask(limit[0], blimit[0], + s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p], + s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]); + + hev = hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]); + + flat = flatmask(1, + s[-5 * p], s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p], + s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p], s[ 4 * p]); + + flat2 = flatmask(1, + s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], s[-1 * p], + s[ 0 * p], s[ 4 * p], s[ 5 * p], s[ 6 * p], s[ 7 * p]); + + wide_mbfilter(mask, hev, flat, flat2, + s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p, + s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, + s, s + 1 * p, s + 2 * p, s + 3 * p, + s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p); + + ++s; + } while (++i < count * 8); +} +void vp9_mb_lpf_vertical_edge_w +( + unsigned char *s, + int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count +) { + signed char hev = 0; /* high edge variance */ + signed char mask = 0; + signed char flat = 0; + signed char flat2 = 0; + int i = 0; + + do { + mask = filter_mask(limit[0], blimit[0], + s[-4], s[-3], s[-2], s[-1], + s[0], s[1], s[2], s[3]); + + hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); + flat = flatmask(1, + s[-5], s[-4], s[-3], s[-2], s[-1], + s[ 0], s[ 1], s[ 2], s[ 3], s[ 4]); + flat2 = flatmask(1, + s[-8], s[-7], s[-6], s[-5], s[-1], + s[ 0], s[ 4], s[ 5], s[ 6], s[ 7]); + + wide_mbfilter(mask, hev, flat, flat2, + s - 8, s - 7, s - 6, s - 5, + s - 4, s - 3, s - 2, s - 1, + s, s + 1, s + 2, s + 3, + s + 4, s + 5, s + 6, s + 7); + s += p; + } while (++i < count * 8); +} + +void vp9_lpf_mbv_w(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + struct loop_filter_info *lfi) { + vp9_mb_lpf_vertical_edge_w(y_ptr, y_stride, + lfi->mblim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp9_mb_lpf_vertical_edge_w(u_ptr, uv_stride, + lfi->mblim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp9_mb_lpf_vertical_edge_w(v_ptr, uv_stride, + lfi->mblim, lfi->lim, lfi->hev_thr, 1); +} +void vp9_lpf_mbh_w(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + struct loop_filter_info *lfi) { + vp9_mb_lpf_horizontal_edge_w(y_ptr, y_stride, + lfi->mblim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp9_mb_lpf_horizontal_edge_w(u_ptr, uv_stride, + lfi->mblim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp9_mb_lpf_horizontal_edge_w(v_ptr, uv_stride, + lfi->mblim, lfi->lim, lfi->hev_thr, 1); +} + +#endif From 6884a83f06dd6c19ffc16758ecf1a5b0f70ec1a8 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Wed, 9 Jan 2013 17:21:28 -0800 Subject: [PATCH 56/77] Merge superblocks64 experiment. Change-Id: If6c88752dffdb566f8d4322f135145270716fb8e --- configure | 2 -- vp9/common/vp9_blockd.h | 2 -- vp9/common/vp9_findnearmv.c | 8 ------ vp9/common/vp9_onyxc_int.h | 2 -- vp9/common/vp9_reconinter.c | 2 -- vp9/common/vp9_reconinter.h | 2 -- vp9/common/vp9_reconintra.c | 6 ----- vp9/common/vp9_rtcd_defs.sh | 4 --- vp9/decoder/vp9_decodframe.c | 19 +++----------- vp9/encoder/vp9_bitstream.c | 7 +---- vp9/encoder/vp9_block.h | 2 -- vp9/encoder/vp9_encodeframe.c | 29 +++----------------- vp9/encoder/vp9_onyx_if.c | 4 --- vp9/encoder/vp9_onyx_int.h | 4 --- vp9/encoder/vp9_rdopt.c | 48 +++++----------------------------- vp9/encoder/vp9_segmentation.c | 5 +--- vp9/encoder/vp9_variance_c.c | 14 ---------- 17 files changed, 14 insertions(+), 146 deletions(-) diff --git a/configure b/configure index 0a5825194..8dc44fca3 100755 --- a/configure +++ b/configure @@ -239,8 +239,6 @@ HAVE_LIST=" EXPERIMENT_LIST=" csm comp_intra_pred - superblocks - superblocks64 pred_filter lossless subpelrefmv diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 26f4a2ff1..5de6d2ecc 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -232,9 +232,7 @@ typedef enum { typedef enum { BLOCK_SIZE_MB16X16 = 0, BLOCK_SIZE_SB32X32 = 1, -#if CONFIG_SUPERBLOCKS64 BLOCK_SIZE_SB64X64 = 2, -#endif } BLOCK_SIZE_TYPE; typedef struct { diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index b92e7d58b..92e0a0603 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -198,7 +198,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, above_src + 16, xd->dst.y_stride, &sse); score += sse; } -#if CONFIG_SUPERBLOCKS64 if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { vp9_sub_pixel_variance16x2(above_ref + offset + 32, ref_y_stride, @@ -213,7 +212,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, above_src + 48, xd->dst.y_stride, &sse); score += sse; } -#endif } if (xd->left_available) { vp9_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride, @@ -230,7 +228,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, xd->dst.y_stride, &sse); score += sse; } -#if CONFIG_SUPERBLOCKS64 if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 32, ref_y_stride, @@ -247,7 +244,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, xd->dst.y_stride, &sse); score += sse; } -#endif } #else row_offset = (this_mv.as_mv.row > 0) ? @@ -263,14 +259,12 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, score += vp9_sad16x3(above_src + 16, xd->dst.y_stride, above_ref + offset + 16, ref_y_stride); } -#if CONFIG_SUPERBLOCKS64 if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { score += vp9_sad16x3(above_src + 32, xd->dst.y_stride, above_ref + offset + 32, ref_y_stride); score += vp9_sad16x3(above_src + 48, xd->dst.y_stride, above_ref + offset + 48, ref_y_stride); } -#endif } if (xd->left_available) { score += vp9_sad3x16(left_src, xd->dst.y_stride, @@ -281,7 +275,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, left_ref + offset + ref_y_stride * 16, ref_y_stride); } -#if CONFIG_SUPERBLOCKS64 if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { score += vp9_sad3x16(left_src + xd->dst.y_stride * 32, xd->dst.y_stride, @@ -292,7 +285,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, left_ref + offset + ref_y_stride * 48, ref_y_stride); } -#endif } #endif // Add the entry to our list and then resort the list on score. diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 440125f91..1dbe7f9f8 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -240,9 +240,7 @@ typedef struct VP9Common { vp9_prob prob_last_coded; vp9_prob prob_gf_coded; vp9_prob sb32_coded; -#if CONFIG_SUPERBLOCKS64 vp9_prob sb64_coded; -#endif // CONFIG_SUPERBLOCKS64 // Context probabilities when using predictive coding of segment id vp9_prob segment_pred_probs[PREDICTION_PROBS]; diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index e6561128c..20de7b7f1 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -614,7 +614,6 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, #endif } -#if CONFIG_SUPERBLOCKS64 void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, uint8_t *dst_y, uint8_t *dst_u, @@ -678,7 +677,6 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, } #endif } -#endif // CONFIG_SUPERBLOCKS64 /* * The following functions should be called after an initial diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 12b4b2514..89868b95e 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -54,14 +54,12 @@ extern void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, int dst_ystride, int dst_uvstride); -#if CONFIG_SUPERBLOCKS64 extern void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, uint8_t *dst_y, uint8_t *dst_u, uint8_t *dst_v, int dst_ystride, int dst_uvstride); -#endif // CONFIG_SUPERBLOCKS64 extern void vp9_build_inter_predictors_mb(MACROBLOCKD *xd); diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index 0504edaeb..eb99285f4 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -703,7 +703,6 @@ void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, vp9_build_interintra_32x32_predictors_sbuv(xd, upred, vpred, uvstride); } -#if CONFIG_SUPERBLOCKS64 void vp9_build_interintra_64x64_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred, int ystride) { @@ -744,7 +743,6 @@ void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd, vp9_build_interintra_64x64_predictors_sby(xd, ypred, ystride); vp9_build_interintra_64x64_predictors_sbuv(xd, upred, vpred, uvstride); } -#endif // CONFIG_SUPERBLOCKS64 #endif // CONFIG_COMP_INTERINTRA_PRED void vp9_build_intra_predictors_mby(MACROBLOCKD *xd) { @@ -768,14 +766,12 @@ void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd) { xd->up_available, xd->left_available); } -#if CONFIG_SUPERBLOCKS64 void vp9_build_intra_predictors_sb64y_s(MACROBLOCKD *xd) { vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, xd->dst.y_buffer, xd->dst.y_stride, xd->mode_info_context->mbmi.mode, 64, xd->up_available, xd->left_available); } -#endif // CONFIG_SUPERBLOCKS64 #if CONFIG_COMP_INTRA_PRED void vp9_build_comp_intra_predictors_mby(MACROBLOCKD *xd) { @@ -834,14 +830,12 @@ void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd) { 16); } -#if CONFIG_SUPERBLOCKS64 void vp9_build_intra_predictors_sb64uv_s(MACROBLOCKD *xd) { vp9_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->mode_info_context->mbmi.uv_mode, 32); } -#endif // CONFIG_SUPERBLOCKS64 #if CONFIG_COMP_INTRA_PRED void vp9_build_comp_intra_predictors_mbuv(MACROBLOCKD *xd) { diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 329c0929e..9d1f51568 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -172,16 +172,12 @@ specialize vp9_build_intra_predictors_mbuv_s; prototype void vp9_build_comp_intra_predictors_mbuv "struct macroblockd *x" specialize vp9_build_comp_intra_predictors_mbuv; -if [ "$CONFIG_SUPERBLOCKS64" = "yes" ]; then - prototype void vp9_build_intra_predictors_sb64y_s "struct macroblockd *x" specialize vp9_build_intra_predictors_sb64y_s; prototype void vp9_build_intra_predictors_sb64uv_s "struct macroblockd *x" specialize vp9_build_intra_predictors_sb64uv_s; -fi - prototype void vp9_intra4x4_predict "struct blockd *x, int b_mode, uint8_t *predictor" specialize vp9_intra4x4_predict; diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 47d23ecbd..8d33dbf78 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -172,13 +172,10 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { */ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { -#if CONFIG_SUPERBLOCKS64 if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { vp9_build_intra_predictors_sb64uv_s(xd); vp9_build_intra_predictors_sb64y_s(xd); - } else -#endif // CONFIG_SUPERBLOCKS64 - if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { + } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { vp9_build_intra_predictors_sbuv_s(xd); vp9_build_intra_predictors_sby_s(xd); } else { @@ -186,7 +183,6 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { vp9_build_intra_predictors_mby_s(xd); } } else { -#if CONFIG_SUPERBLOCKS64 if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, @@ -194,9 +190,7 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); - } else -#endif // CONFIG_SUPERBLOCKS64 - if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { + } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, @@ -701,7 +695,6 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->dst.uv_stride, xd->eobs + 16, xd); }; -#if CONFIG_SUPERBLOCKS64 static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, unsigned int mb_col, BOOL_DECODER* const bc) { @@ -831,7 +824,6 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->left_context = pc->left_context; xd->mode_info_context = orig_mi; } -#endif // CONFIG_SUPERBLOCKS64 static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, unsigned int mb_col, @@ -1184,16 +1176,13 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, vpx_memset(pc->left_context, 0, sizeof(pc->left_context)); for (mb_col = 0; mb_col < pc->mb_cols; mb_col += 4) { -#if CONFIG_SUPERBLOCKS64 if (vp9_read(bc, pc->sb64_coded)) { set_offsets(pbi, 64, mb_row, mb_col); vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc); set_refs(pbi, 64, mb_row, mb_col); decode_superblock64(pbi, xd, mb_row, mb_col, bc); xd->corrupted |= bool_error(bc); - } else -#endif // CONFIG_SUPERBLOCKS64 - { + } else { int j; for (j = 0; j < 4; j++) { @@ -1596,9 +1585,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { } } -#if CONFIG_SUPERBLOCKS64 pc->sb64_coded = vp9_read_literal(&header_bc, 8); -#endif pc->sb32_coded = vp9_read_literal(&header_bc, 8); /* Read the loop filter level and type */ diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index a7dac74e3..ba06bffbf 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1122,13 +1122,10 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc) { for (mb_row = 0; mb_row < c->mb_rows; mb_row += 4, m_ptr += 4 * mis) { m = m_ptr; for (mb_col = 0; mb_col < c->mb_cols; mb_col += 4, m += 4) { -#if CONFIG_SUPERBLOCKS64 vp9_write(bc, m->mbmi.sb_type == BLOCK_SIZE_SB64X64, c->sb64_coded); if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) { write_modes_b(cpi, m, bc, &tok, tok_end, mb_row, mb_col); - } else -#endif - { + } else { int j; for (j = 0; j < 4; j++) { @@ -1689,10 +1686,8 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } } -#if CONFIG_SUPERBLOCKS64 pc->sb64_coded = get_binary_prob(cpi->sb64_count[0], cpi->sb64_count[1]); vp9_write_literal(&header_bc, pc->sb64_coded, 8); -#endif pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]); vp9_write_literal(&header_bc, pc->sb32_coded, 8); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 18e5f73ff..f856daddf 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -180,9 +180,7 @@ typedef struct macroblock { PICK_MODE_CONTEXT mb_context[4][4]; // when 4 MBs share coding parameters: PICK_MODE_CONTEXT sb32_context[4]; -#if CONFIG_SUPERBLOCKS64 PICK_MODE_CONTEXT sb64_context; -#endif // CONFIG_SUPERBLOCKS64 void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch); void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index d8478a15b..0abf22cff 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -888,7 +888,6 @@ static void pick_sb_modes(VP9_COMP *cpi, } } -#if CONFIG_SUPERBLOCKS64 static void pick_sb64_modes(VP9_COMP *cpi, int mb_row, int mb_col, @@ -924,7 +923,6 @@ static void pick_sb64_modes(VP9_COMP *cpi, totaldist); } } -#endif // CONFIG_SUPERBLOCKS64 static void update_stats(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -1054,7 +1052,6 @@ static void encode_sb(VP9_COMP *cpi, #endif } -#if CONFIG_SUPERBLOCKS64 static void encode_sb64(VP9_COMP *cpi, int mb_row, int mb_col, @@ -1094,7 +1091,6 @@ static void encode_sb64(VP9_COMP *cpi, } } } -#endif // CONFIG_SUPERBLOCKS64 static void encode_sb_row(VP9_COMP *cpi, int mb_row, @@ -1114,14 +1110,12 @@ static void encode_sb_row(VP9_COMP *cpi, int i; int sb32_rate = 0, sb32_dist = 0; int is_sb[4]; -#if CONFIG_SUPERBLOCKS64 int sb64_rate = INT_MAX, sb64_dist; ENTROPY_CONTEXT_PLANES l[4], a[4]; TOKENEXTRA *tp_orig = *tp; memcpy(&a, cm->above_context + mb_col, sizeof(a)); memcpy(&l, cm->left_context, sizeof(l)); -#endif // CONFIG_SUPERBLOCKS64 for (i = 0; i < 4; i++) { const int x_idx = (i & 1) << 1, y_idx = i & 2; int mb_rate = 0, mb_dist = 0; @@ -1163,11 +1157,9 @@ static void encode_sb_row(VP9_COMP *cpi, // pixels of the lower level; also, inverting SB/MB order (big->small // instead of small->big) means we can use as threshold for small, which // may enable breakouts if RD is not good enough (i.e. faster) - encode_sb(cpi, mb_row + y_idx, mb_col + x_idx, - !CONFIG_SUPERBLOCKS64, tp, is_sb[i]); + encode_sb(cpi, mb_row + y_idx, mb_col + x_idx, 0, tp, is_sb[i]); } -#if CONFIG_SUPERBLOCKS64 memcpy(cm->above_context + mb_col, &a, sizeof(a)); memcpy(cm->left_context, &l, sizeof(l)); sb32_rate += vp9_cost_bit(cm->sb64_coded, 0); @@ -1184,17 +1176,13 @@ static void encode_sb_row(VP9_COMP *cpi, RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) { is_sb[0] = 2; *totalrate += sb64_rate; - } else -#endif - { + } else { *totalrate += sb32_rate; } -#if CONFIG_SUPERBLOCKS64 assert(tp_orig == *tp); encode_sb64(cpi, mb_row, mb_col, tp, is_sb); assert(tp_orig < *tp); -#endif // CONFIG_SUPERBLOCKS64 } } @@ -1244,9 +1232,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_zero(cpi->common.fc.mv_ref_ct) vp9_zero(cpi->sb_ymode_count) vp9_zero(cpi->sb32_count); -#if CONFIG_SUPERBLOCKS64 vp9_zero(cpi->sb64_count); -#endif // CONFIG_SUPERBLOCKS64 #if CONFIG_COMP_INTERINTRA_PRED vp9_zero(cpi->interintra_count); vp9_zero(cpi->interintra_select_count); @@ -1458,7 +1444,6 @@ static void reset_skip_txfm_size_sb32(VP9_COMP *cpi, MODE_INFO *mi, } } -#if CONFIG_SUPERBLOCKS64 static void reset_skip_txfm_size_sb64(VP9_COMP *cpi, MODE_INFO *mi, int mis, TX_SIZE txfm_max, int mb_rows_left, int mb_cols_left) { @@ -1479,7 +1464,6 @@ static void reset_skip_txfm_size_sb64(VP9_COMP *cpi, MODE_INFO *mi, set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max); } } -#endif static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { VP9_COMMON *const cm = &cpi->common; @@ -1490,13 +1474,10 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { mi = mi_ptr; for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) { -#if CONFIG_SUPERBLOCKS64 if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { reset_skip_txfm_size_sb64(cpi, mi, mis, txfm_max, cm->mb_rows - mb_row, cm->mb_cols - mb_col); - } else -#endif // CONFIG_SUPERBLOCKS64 - { + } else { int i; for (i = 0; i < 4; i++) { @@ -1924,7 +1905,6 @@ static void update_sb_skip_coeff_state(VP9_COMP *cpi, } } -#if CONFIG_SUPERBLOCKS64 static void update_sb64_skip_coeff_state(VP9_COMP *cpi, ENTROPY_CONTEXT_PLANES ta[16], ENTROPY_CONTEXT_PLANES tl[16], @@ -2038,7 +2018,6 @@ static void update_sb64_skip_coeff_state(VP9_COMP *cpi, } } } -#endif // CONFIG_SUPERBLOCKS64 static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset, @@ -2541,7 +2520,6 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, } } -#if CONFIG_SUPERBLOCKS64 static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset, int output_enabled, int mb_row, int mb_col) { @@ -2823,4 +2801,3 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, } } } -#endif // CONFIG_SUPERBLOCKS64 diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index f08ea6feb..6a4c62496 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1618,9 +1618,7 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cm->prob_gf_coded = 128; cm->prob_intra_coded = 63; cm->sb32_coded = 200; -#if CONFIG_SUPERBLOCKS64 cm->sb64_coded = 200; -#endif for (i = 0; i < COMP_PRED_CONTEXTS; i++) cm->prob_comppred[i] = 128; for (i = 0; i < TX_SIZE_MAX_SB - 1; i++) @@ -1825,12 +1823,10 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8, vp9_sad32x32x4d) -#if CONFIG_SUPERBLOCKS64 BFP(BLOCK_64X64, vp9_sad64x64, vp9_variance64x64, vp9_sub_pixel_variance64x64, vp9_variance_halfpixvar64x64_h, vp9_variance_halfpixvar64x64_v, vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8, vp9_sad64x64x4d) -#endif BFP(BLOCK_16X16, vp9_sad16x16, vp9_variance16x16, vp9_sub_pixel_variance16x16, vp9_variance_halfpixvar16x16_h, vp9_variance_halfpixvar16x16_v, diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index d917c0e7f..403335047 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -292,9 +292,7 @@ enum BlockSize { BLOCK_16X16, BLOCK_MAX_SEGMENTS, BLOCK_32X32 = BLOCK_MAX_SEGMENTS, -#if CONFIG_SUPERBLOCKS64 BLOCK_64X64, -#endif // CONFIG_SUPERBLOCKS64 BLOCK_MAX_SB_SEGMENTS, }; @@ -475,9 +473,7 @@ typedef struct VP9_COMP { int cq_target_quality; int sb32_count[2]; -#if CONFIG_SUPERBLOCKS64 int sb64_count[2]; -#endif int sb_ymode_count [VP9_I32X32_MODES]; int ymode_count[VP9_YMODES]; /* intra MB type cts this frame */ int bmode_count[VP9_NKF_BINTRAMODES]; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 7c8f41d84..a8ece2c58 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -982,7 +982,6 @@ static void super_block_yrd(VP9_COMP *cpi, xd->left_context = orig_left; } -#if CONFIG_SUPERBLOCKS64 static void super_block_64_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int *skip, @@ -1080,7 +1079,6 @@ static void super_block_64_yrd(VP9_COMP *cpi, xd->above_context = orig_above; xd->left_context = orig_left; } -#endif // CONFIG_SUPERBLOCKS64 static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) { const unsigned int *p = (const unsigned int *)predictor; @@ -1374,7 +1372,6 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, return best_rd; } -#if CONFIG_SUPERBLOCKS64 static int64_t rd_pick_intra_sb64y_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, @@ -1414,7 +1411,6 @@ static int64_t rd_pick_intra_sb64y_mode(VP9_COMP *cpi, return best_rd; } -#endif // CONFIG_SUPERBLOCKS64 static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -1893,7 +1889,6 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -#if CONFIG_SUPERBLOCKS64 static void super_block_64_uvrd(MACROBLOCK *x, int *rate, int *distortion, int *skip); static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, @@ -1901,7 +1896,6 @@ static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, super_block_64_uvrd(x, rate, distortion, skip); return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -#endif // CONFIG_SUPERBLOCKS64 static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int *skip, int fullpixel) { @@ -2102,7 +2096,6 @@ static void super_block_uvrd(MACROBLOCK *x, } } -#if CONFIG_SUPERBLOCKS64 static void super_block_64_uvrd(MACROBLOCK *x, int *rate, int *distortion, @@ -2184,7 +2177,6 @@ static void super_block_64_uvrd(MACROBLOCK *x, xd->left_context = tl_orig; xd->above_context = ta_orig; } -#endif // CONFIG_SUPERBLOCKS64 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -2223,7 +2215,6 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, return best_rd; } -#if CONFIG_SUPERBLOCKS64 static int64_t rd_pick_intra_sb64uv_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, @@ -2260,7 +2251,6 @@ static int64_t rd_pick_intra_sb64uv_mode(VP9_COMP *cpi, return best_rd; } -#endif // CONFIG_SUPERBLOCKS64 int vp9_cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE m, @@ -3548,7 +3538,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } #endif -#if CONFIG_SUPERBLOCKS64 if (block_size == BLOCK_64X64) { vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, @@ -3556,9 +3545,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); - } else -#endif // CONFIG_SUPERBLOCKS64 - if (block_size == BLOCK_32X32) { + } else if (block_size == BLOCK_32X32) { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, @@ -3587,13 +3574,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (threshold < x->encode_breakout) threshold = x->encode_breakout; -#if CONFIG_SUPERBLOCKS64 if (block_size == BLOCK_64X64) { var = vp9_variance64x64(*(b->base_src), b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); - } else -#endif // CONFIG_SUPERBLOCKS64 - if (block_size == BLOCK_32X32) { + } else if (block_size == BLOCK_32X32) { var = vp9_variance32x32(*(b->base_src), b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); } else { @@ -3611,7 +3595,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Check u and v to make sure skip is ok int sse2; -#if CONFIG_SUPERBLOCKS64 if (block_size == BLOCK_64X64) { unsigned int sse2u, sse2v; var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride, @@ -3619,9 +3602,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride, xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); sse2 = sse2u + sse2v; - } else -#endif // CONFIG_SUPERBLOCKS64 - if (block_size == BLOCK_32X32) { + } else if (block_size == BLOCK_32X32) { unsigned int sse2u, sse2v; var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride, xd->dst.u_buffer, xd->dst.uv_stride, &sse2u); @@ -3661,7 +3642,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (!x->skip) { -#if CONFIG_SUPERBLOCKS64 if (block_size == BLOCK_64X64) { int skippable_y, skippable_uv; @@ -3677,9 +3657,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *rate2 += *rate_uv; *distortion += *distortion_uv; *skippable = skippable_y && skippable_uv; - } else -#endif // CONFIG_SUPERBLOCKS64 - if (block_size == BLOCK_32X32) { + } else if (block_size == BLOCK_32X32) { int skippable_y, skippable_uv; // Y cost and distortion @@ -4540,7 +4518,6 @@ void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, } } -#if CONFIG_SUPERBLOCKS64 void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, int *returnrate, int *returndist) { @@ -4569,7 +4546,6 @@ void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, *returndist = dist_y + (dist_uv >> 2); } } -#endif // CONFIG_SUPERBLOCKS64 void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, int *returnrate, int *returndist) { @@ -4800,7 +4776,6 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, frame_mv[ZEROMV][ref_frame].as_int = 0; } -#if CONFIG_SUPERBLOCKS64 if (block_size == BLOCK_64X64) { mbmi->mode = DC_PRED; if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { @@ -4824,9 +4799,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mode_uv_16x16 = mbmi->uv_mode; } #endif // CONFIG_TX32X32 - } else -#endif // CONFIG_SUPERBLOCKS64 - { + } else { assert(block_size == BLOCK_32X32); mbmi->mode = DC_PRED; if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { @@ -4977,14 +4950,11 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (ref_frame == INTRA_FRAME) { -#if CONFIG_SUPERBLOCKS64 if (block_size == BLOCK_64X64) { vp9_build_intra_predictors_sb64y_s(xd); super_block_64_yrd(cpi, x, &rate_y, &distortion_y, &skippable, txfm_cache); - } else -#endif // CONFIG_SUPERBLOCKS64 - { + } else { assert(block_size == BLOCK_32X32); vp9_build_intra_predictors_sby_s(xd); super_block_yrd(cpi, x, &rate_y, &distortion_y, @@ -5296,13 +5266,9 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, end: { -#if CONFIG_SUPERBLOCKS64 PICK_MODE_CONTEXT *p = (block_size == BLOCK_32X32) ? &x->sb32_context[xd->sb_index] : &x->sb64_context; -#else - PICK_MODE_CONTEXT *p = &x->sb32_context[xd->sb_index]; -#endif store_coding_context(x, p, best_mode_index, NULL, &mbmi->ref_mvs[mbmi->ref_frame][0], &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 : @@ -5321,7 +5287,6 @@ int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, returnrate, returndistortion, BLOCK_32X32); } -#if CONFIG_SUPERBLOCKS64 int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, @@ -5329,7 +5294,6 @@ int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, return vp9_rd_pick_inter_mode_sb(cpi, x, recon_yoffset, recon_uvoffset, returnrate, returndistortion, BLOCK_64X64); } -#endif // CONFIG_SUPERBLOCKS64 void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, int recon_yoffset, diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index 2ef7be2a3..49195e80c 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -221,13 +221,10 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { mi = mi_ptr; for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) { -#if CONFIG_SUPERBLOCKS64 if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, 4, mb_row, mb_col); - } else -#endif - { + } else { for (i = 0; i < 4; i++) { int x_idx = (i & 1) << 1, y_idx = i & 2; MODE_INFO *sb_mi = mi + y_idx * mis + x_idx; diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index 4eee6ae56..d03e285c6 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -24,7 +24,6 @@ unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { return sum; } -#if CONFIG_SUPERBLOCKS64 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -37,7 +36,6 @@ unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, *sse = var; return (var - (((int64_t)avg * avg) >> 12)); } -#endif // CONFIG_SUPERBLOCKS64 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, @@ -197,7 +195,6 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, return vp9_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } -#if CONFIG_SUPERBLOCKS64 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -218,7 +215,6 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, return vp9_variance64x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse); } -#endif // CONFIG_SUPERBLOCKS64 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int src_pixels_per_line, @@ -258,7 +254,6 @@ unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, ref_ptr, recon_stride, sse); } -#if CONFIG_SUPERBLOCKS64 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -267,8 +262,6 @@ unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, ref_ptr, recon_stride, sse); } -#endif // CONFIG_SUPERBLOCKS64 - unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, @@ -288,7 +281,6 @@ unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, ref_ptr, recon_stride, sse); } -#if CONFIG_SUPERBLOCKS64 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -297,8 +289,6 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, ref_ptr, recon_stride, sse); } -#endif // #if CONFIG_SUPERBLOCKS64 - unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, @@ -318,7 +308,6 @@ unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, ref_ptr, recon_stride, sse); } -#if CONFIG_SUPERBLOCKS64 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -327,7 +316,6 @@ unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, ref_ptr, recon_stride, sse); } -#endif // CONFIG_SUPERBLOCKS64 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, int src_pixels_per_line, @@ -355,7 +343,6 @@ unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, return *sse; } -#if CONFIG_SUPERBLOCKS64 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -368,7 +355,6 @@ unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, dst_pixels_per_line, sse); return *sse; } -#endif // CONFIG_SUPERBLOCKS64 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int src_pixels_per_line, From aa2effa954782b04db3acc93c1daeaa2c2ba1553 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 10 Jan 2013 08:23:59 -0800 Subject: [PATCH 57/77] Merge tx32x32 experiment. Change-Id: I615651e4c7b09e576a341ad425cf80c393637833 --- configure | 1 - test/dct32x32_test.cc | 6 +-- test/test.mk | 4 +- vp9/common/vp9_blockd.c | 4 -- vp9/common/vp9_blockd.h | 8 ---- vp9/common/vp9_default_coef_probs.h | 2 - vp9/common/vp9_entropy.c | 14 +------ vp9/common/vp9_entropy.h | 8 ---- vp9/common/vp9_idctllm.c | 2 - vp9/common/vp9_invtrans.c | 2 - vp9/common/vp9_invtrans.h | 2 - vp9/common/vp9_loopfilter.c | 2 - vp9/common/vp9_onyxc_int.h | 12 +----- vp9/common/vp9_recon.c | 2 - vp9/common/vp9_rtcd_defs.sh | 8 ---- vp9/decoder/vp9_decodemv.c | 8 ---- vp9/decoder/vp9_decodframe.c | 19 +--------- vp9/decoder/vp9_dequantize.c | 2 - vp9/decoder/vp9_detokenize.c | 4 -- vp9/decoder/vp9_detokenize.h | 2 - vp9/encoder/vp9_bitstream.c | 24 ------------ vp9/encoder/vp9_block.h | 10 ----- vp9/encoder/vp9_dct.c | 6 --- vp9/encoder/vp9_encodeframe.c | 59 ++++------------------------- vp9/encoder/vp9_encodemb.c | 4 -- vp9/encoder/vp9_encodemb.h | 4 -- vp9/encoder/vp9_onyx_if.c | 2 - vp9/encoder/vp9_onyx_int.h | 6 --- vp9/encoder/vp9_quantize.c | 16 -------- vp9/encoder/vp9_quantize.h | 2 - vp9/encoder/vp9_ratectrl.c | 4 -- vp9/encoder/vp9_rdopt.c | 58 +++++----------------------- vp9/encoder/vp9_tokenize.c | 46 ---------------------- vp9/encoder/vp9_tokenize.h | 6 --- 34 files changed, 24 insertions(+), 335 deletions(-) diff --git a/configure b/configure index ab92df328..5506aec45 100755 --- a/configure +++ b/configure @@ -246,7 +246,6 @@ EXPERIMENT_LIST=" implicit_segmentation newbintramodes comp_interintra_pred - tx32x32 tx64x64 dwtdcthybrid cnvcontext diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc index 1ac6fb0a9..f0623eb32 100644 --- a/test/dct32x32_test.cc +++ b/test/dct32x32_test.cc @@ -36,7 +36,7 @@ static int round(double x) { } #endif -#if !CONFIG_DWT32X32HYBRID +#if !CONFIG_DWTDCTHYBRID static const double kPi = 3.141592653589793238462643383279502884; static void reference2_32x32_idct_2d(double *input, double *output) { double x; @@ -127,9 +127,9 @@ TEST(VP9Idct32x32Test, AccuracyCheck) { } } } -#else // CONFIG_DWT32X32HYBRID +#else // CONFIG_DWTDCTHYBRID // TODO(rbultje/debargha): add DWT-specific tests -#endif // CONFIG_DWT32X32HYBRID +#endif // CONFIG_DWTDCTHYBRID TEST(VP9Fdct32x32Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); unsigned int max_error = 0; diff --git a/test/test.mk b/test/test.mk index 28d387264..e7a1dcd34 100644 --- a/test/test.mk +++ b/test/test.mk @@ -69,9 +69,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc #LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc -ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_TX32X32),yesyes) -LIBVPX_TEST_SRCS-yes += dct32x32_test.cc -endif +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc endif # VP9 diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c index c3fa07284..4ae8132bb 100644 --- a/vp9/common/vp9_blockd.c +++ b/vp9/common/vp9_blockd.c @@ -16,15 +16,11 @@ const uint8_t vp9_block2left[TX_SIZE_MAX_SB][25] = { {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8}, {0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}, -#if CONFIG_TX32X32 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8} -#endif }; const uint8_t vp9_block2above[TX_SIZE_MAX_SB][25] = { {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8}, {0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}, -#if CONFIG_TX32X32 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8} -#endif }; diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 5de6d2ecc..b5f9bd2c4 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -125,12 +125,8 @@ typedef enum { TX_8X8 = 1, // 8x8 dct transform TX_16X16 = 2, // 16x16 dct transform TX_SIZE_MAX_MB = 3, // Number of different transforms available -#if CONFIG_TX32X32 TX_32X32 = TX_SIZE_MAX_MB, // 32x32 dct transform TX_SIZE_MAX_SB, // Number of transforms available to SBs -#else - TX_SIZE_MAX_SB = TX_SIZE_MAX_MB, -#endif } TX_SIZE; typedef enum { @@ -301,14 +297,12 @@ typedef struct blockd { union b_mode_info bmi; } BLOCKD; -#if CONFIG_TX32X32 typedef struct superblockd { /* 32x32 Y and 16x16 U/V. No 2nd order transform yet. */ DECLARE_ALIGNED(16, int16_t, diff[32*32+16*16*2]); DECLARE_ALIGNED(16, int16_t, qcoeff[32*32+16*16*2]); DECLARE_ALIGNED(16, int16_t, dqcoeff[32*32+16*16*2]); } SUPERBLOCKD; -#endif typedef struct macroblockd { DECLARE_ALIGNED(16, int16_t, diff[400]); /* from idct diff */ @@ -317,9 +311,7 @@ typedef struct macroblockd { DECLARE_ALIGNED(16, int16_t, dqcoeff[400]); DECLARE_ALIGNED(16, uint16_t, eobs[25]); -#if CONFIG_TX32X32 SUPERBLOCKD sb_coeff_data; -#endif /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */ BLOCKD block[25]; diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index 0c9131db2..10d3c389f 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -1038,7 +1038,6 @@ static const vp9_coeff_probs default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] } } }; -#if CONFIG_TX32X32 static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = { { /* block Type 0 */ { /* Coeff Band 0 */ @@ -1210,4 +1209,3 @@ static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = { } } }; -#endif // CONFIG_TX32X32 diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index bc8738411..2200cc3f9 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -142,7 +142,6 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { 237, 252, 253, 238, 223, 239, 254, 255, }; -#if CONFIG_TX32X32 #if CONFIG_DWTDCTHYBRID DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, @@ -352,7 +351,7 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 975, 991, 510, 1006, 1022, 511, 1007, 1023, }; -#else +#else // CONFIG_DWTDCTHYBRID DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, @@ -459,7 +458,6 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023, }; #endif // CONFIG_DWTDCTHYBRID -#endif /* Array indices are identical to previously-existing CONTEXT_NODE indices */ @@ -547,10 +545,8 @@ DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_8x8_neighbors[64 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_16x16_neighbors[256 * MAX_NEIGHBORS]); -#if CONFIG_TX32X32 DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_32x32_neighbors[1024 * MAX_NEIGHBORS]); -#endif static int find_in_scan(const int *scan, int l, int m) { int i, l2 = l * l; @@ -628,10 +624,8 @@ void vp9_init_neighbors() { vp9_default_zig_zag1d_8x8_neighbors); init_scan_neighbors(vp9_default_zig_zag1d_16x16, 16, vp9_default_zig_zag1d_16x16_neighbors); -#if CONFIG_TX32X32 init_scan_neighbors(vp9_default_zig_zag1d_32x32, 32, vp9_default_zig_zag1d_32x32_neighbors); -#endif } const int *vp9_get_coef_neighbors_handle(const int *scan) { @@ -645,10 +639,8 @@ const int *vp9_get_coef_neighbors_handle(const int *scan) { return vp9_default_zig_zag1d_8x8_neighbors; } else if (scan == vp9_default_zig_zag1d_16x16) { return vp9_default_zig_zag1d_16x16_neighbors; -#if CONFIG_TX32X32 } else if (scan == vp9_default_zig_zag1d_32x32) { return vp9_default_zig_zag1d_32x32_neighbors; -#endif } return vp9_default_zig_zag1d_4x4_neighbors; } @@ -693,10 +685,8 @@ void vp9_default_coef_probs(VP9_COMMON *pc) { vpx_memcpy(pc->fc.hybrid_coef_probs_16x16, default_hybrid_coef_probs_16x16, sizeof(pc->fc.hybrid_coef_probs_16x16)); -#if CONFIG_TX32X32 vpx_memcpy(pc->fc.coef_probs_32x32, default_coef_probs_32x32, sizeof(pc->fc.coef_probs_32x32)); -#endif } void vp9_coef_tree_initialize() { @@ -840,9 +830,7 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) { cm->fc.pre_hybrid_coef_probs_16x16, BLOCK_TYPES_16X16, cm->fc.hybrid_coef_counts_16x16, count_sat, update_factor); -#if CONFIG_TX32X32 update_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32, BLOCK_TYPES_32X32, cm->fc.coef_counts_32x32, count_sat, update_factor); -#endif } diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index cc65ffc0a..84e5255c2 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -66,9 +66,7 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ #define BLOCK_TYPES_16X16 4 -#if CONFIG_TX32X32 #define BLOCK_TYPES_32X32 4 -#endif /* Middle dimension is a coarsening of the coefficient's position within the 4x4 DCT. */ @@ -77,9 +75,7 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_4x4[16]); extern DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]); extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]); -#if CONFIG_TX32X32 extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]); -#endif /* Inside dimension is 3-valued measure of nearby complexity, that is, the extent to which nearby coefficients are nonzero. For the first @@ -122,9 +118,7 @@ extern DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]); extern DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]); extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]); -#if CONFIG_TX32X32 extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]); -#endif void vp9_coef_tree_initialize(void); void vp9_adapt_coef_probs(struct VP9Common *); @@ -154,9 +148,7 @@ extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_8x8_neighbors[ 64 * MAX_NEIGHBORS]); extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_16x16_neighbors[ 256 * MAX_NEIGHBORS]); -#if CONFIG_TX32X32 extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_32x32_neighbors[ 1024 * MAX_NEIGHBORS]); -#endif #endif // CONFIG_NEWCOEFCONTEXT #endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index baa22457f..0e6a6447c 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -1533,7 +1533,6 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { #undef RIGHT_ROUNDING #endif -#if CONFIG_TX32X32 #if !CONFIG_DWTDCTHYBRID #define DownshiftMultiplyBy2(x) x * 2 #define DownshiftMultiply(x) x @@ -2505,4 +2504,3 @@ void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { #endif } #endif // CONFIG_DWTDCTHYBRID -#endif // CONFIG_TX32X32 diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index a09c0a80a..b5e6e3cc2 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -146,7 +146,6 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) { vp9_inverse_transform_mbuv_8x8(xd); } -#if CONFIG_TX32X32 void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb) { vp9_short_idct32x32(xd_sb->dqcoeff, xd_sb->diff, 64); } @@ -157,4 +156,3 @@ void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb) { vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1280, xd_sb->diff + 1280, 32); } -#endif diff --git a/vp9/common/vp9_invtrans.h b/vp9/common/vp9_invtrans.h index 1f0164bb1..fd0eb3020 100644 --- a/vp9/common/vp9_invtrans.h +++ b/vp9/common/vp9_invtrans.h @@ -39,9 +39,7 @@ extern void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd); extern void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd); -#if CONFIG_TX32X32 extern void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb); extern void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb); -#endif #endif // VP9_COMMON_VP9_INVTRANS_H_ diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 8c13aaf56..0badd276c 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -187,9 +187,7 @@ static int sb_mb_lf_skip(const MODE_INFO *const mip0, const MODE_INFO *const mip1) { return mb_lf_skip(&mip0->mbmi) && mb_lf_skip(&mip1->mbmi) && -#if CONFIG_TX32X32 mip0->mbmi.txfm_size >= TX_32X32 && -#endif mip0->mbmi.ref_frame; } void vp9_loop_filter_frame(VP9_COMMON *cm, diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 1dbe7f9f8..ac66e4902 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -55,9 +55,7 @@ typedef struct frame_contexts { vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES_16X16]; vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; -#endif nmv_context nmvc; nmv_context pre_nmvc; @@ -82,9 +80,7 @@ typedef struct frame_contexts { vp9_coeff_probs pre_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES_16X16]; vp9_coeff_probs pre_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES_32X32]; -#endif vp9_coeff_count coef_counts_4x4[BLOCK_TYPES_4X4]; vp9_coeff_count hybrid_coef_counts_4x4[BLOCK_TYPES_4X4]; @@ -92,9 +88,7 @@ typedef struct frame_contexts { vp9_coeff_count hybrid_coef_counts_8x8[BLOCK_TYPES_8X8]; vp9_coeff_count coef_counts_16x16[BLOCK_TYPES_16X16]; vp9_coeff_count hybrid_coef_counts_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; -#endif nmv_context_counts NMVcount; vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] @@ -125,11 +119,9 @@ typedef enum { ONLY_4X4 = 0, ALLOW_8X8 = 1, ALLOW_16X16 = 2, -#if CONFIG_TX32X32 ALLOW_32X32 = 3, -#endif - TX_MODE_SELECT = 3 + CONFIG_TX32X32, - NB_TXFM_MODES = 4 + CONFIG_TX32X32, + TX_MODE_SELECT = 4, + NB_TXFM_MODES = 5, } TXFM_MODE; typedef struct VP9Common { diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c index 5c3f565a5..caf7b8d22 100644 --- a/vp9/common/vp9_recon.c +++ b/vp9/common/vp9_recon.c @@ -115,7 +115,6 @@ void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { } } -#if CONFIG_TX32X32 void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) { int x, y, stride = xd->block[0].dst_stride; int16_t *diff = xd->sb_coeff_data.diff; @@ -145,7 +144,6 @@ void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { vdiff += 16; } } -#endif // CONFIG_TX32X32 void vp9_recon_mby_c(MACROBLOCKD *xd) { int i; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 9d1f51568..004d4f5b9 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -77,16 +77,12 @@ specialize vp9_dequant_idct_add_y_block prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs" specialize vp9_dequant_idct_add_uv_block -if [ "$CONFIG_TX32X32" = "yes" ]; then - prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int pitch, int stride, int eob" specialize vp9_dequant_idct_add_32x32 prototype void vp9_dequant_idct_add_uv_block_16x16 "int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs" specialize vp9_dequant_idct_add_uv_block_16x16 -fi - # # RECON # @@ -135,16 +131,12 @@ specialize vp9_recon_mby_s prototype void vp9_recon_mbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst" specialize void vp9_recon_mbuv_s -if [ "$CONFIG_TX32X32" = "yes" ]; then - prototype void vp9_recon_sby_s "struct macroblockd *x, uint8_t *dst" specialize vp9_recon_sby_s prototype void vp9_recon_sbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst" specialize void vp9_recon_sbuv_s -fi - prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x" specialize vp9_build_intra_predictors_mby_s diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 4b633df45..8b39a1eb1 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -222,15 +222,11 @@ static void kfread_modes(VP9D_COMP *pbi, m->mbmi.txfm_size = vp9_read(bc, cm->prob_tx[0]); if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED) { m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[1]); -#if CONFIG_TX32X32 if (m->mbmi.txfm_size != TX_8X8 && m->mbmi.sb_type) m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[2]); -#endif } -#if CONFIG_TX32X32 } else if (cm->txfm_mode >= ALLOW_32X32 && m->mbmi.sb_type) { m->mbmi.txfm_size = TX_32X32; -#endif } else if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) { m->mbmi.txfm_size = TX_16X16; } else if (cm->txfm_mode >= ALLOW_8X8 && m->mbmi.mode != B_PRED) { @@ -1208,15 +1204,11 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, if (mbmi->txfm_size != TX_4X4 && mbmi->mode != I8X8_PRED && mbmi->mode != SPLITMV) { mbmi->txfm_size += vp9_read(bc, cm->prob_tx[1]); -#if CONFIG_TX32X32 if (mbmi->sb_type && mbmi->txfm_size != TX_8X8) mbmi->txfm_size += vp9_read(bc, cm->prob_tx[2]); -#endif } -#if CONFIG_TX32X32 } else if (mbmi->sb_type && cm->txfm_mode >= ALLOW_32X32) { mbmi->txfm_size = TX_32X32; -#endif } else if (cm->txfm_mode >= ALLOW_16X16 && ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) || (mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) { diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 8d33dbf78..685491ff5 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -747,7 +747,6 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, } /* dequantization and idct */ -#if CONFIG_TX32X32 if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; @@ -787,7 +786,6 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, } } } else { -#endif for (n = 0; n < 16; n++) { int x_idx = n & 3, y_idx = n >> 2; @@ -816,9 +814,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, decode_4x4_sb(pbi, xd, bc, n, 3, 2); } } -#if CONFIG_TX32X32 } -#endif xd->above_context = pc->above_context + mb_col; xd->left_context = pc->left_context; @@ -873,7 +869,6 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, } /* dequantization and idct */ -#if CONFIG_TX32X32 if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { eobtotal = vp9_decode_sb_tokens(pbi, xd, bc); if (eobtotal == 0) { // skip loopfilter @@ -895,9 +890,7 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); } - } else -#endif - { + } else { for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; @@ -1396,11 +1389,9 @@ static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16); } -#if CONFIG_TX32X32 if (pbi->common.txfm_mode > ALLOW_16X16) { read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES_32X32); } -#endif } int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { @@ -1590,16 +1581,12 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { /* Read the loop filter level and type */ pc->txfm_mode = vp9_read_literal(&header_bc, 2); -#if CONFIG_TX32X32 if (pc->txfm_mode == 3) pc->txfm_mode += vp9_read_bit(&header_bc); -#endif if (pc->txfm_mode == TX_MODE_SELECT) { pc->prob_tx[0] = vp9_read_literal(&header_bc, 8); pc->prob_tx[1] = vp9_read_literal(&header_bc, 8); -#if CONFIG_TX32X32 pc->prob_tx[2] = vp9_read_literal(&header_bc, 8); -#endif } pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc); @@ -1782,10 +1769,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pbi->common.fc.coef_probs_16x16); vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_16x16, pbi->common.fc.hybrid_coef_probs_16x16); -#if CONFIG_TX32X32 vp9_copy(pbi->common.fc.pre_coef_probs_32x32, pbi->common.fc.coef_probs_32x32); -#endif vp9_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob); vp9_copy(pbi->common.fc.pre_sb_ymode_prob, pbi->common.fc.sb_ymode_prob); vp9_copy(pbi->common.fc.pre_uv_mode_prob, pbi->common.fc.uv_mode_prob); @@ -1803,9 +1788,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { vp9_zero(pbi->common.fc.hybrid_coef_counts_8x8); vp9_zero(pbi->common.fc.coef_counts_16x16); vp9_zero(pbi->common.fc.hybrid_coef_counts_16x16); -#if CONFIG_TX32X32 vp9_zero(pbi->common.fc.coef_counts_32x32); -#endif vp9_zero(pbi->common.fc.ymode_counts); vp9_zero(pbi->common.fc.sb_ymode_counts); vp9_zero(pbi->common.fc.uv_mode_counts); diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 46d4d01da..354d2bd36 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -348,7 +348,6 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq, } } -#if CONFIG_TX32X32 void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob) { @@ -373,4 +372,3 @@ void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq, vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride, eobs[4]); } -#endif diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 1286f9cca..335c335ca 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -144,12 +144,10 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, coef_counts = fc->hybrid_coef_counts_16x16; } break; -#if CONFIG_TX32X32 case TX_32X32: coef_probs = fc->coef_probs_32x32; coef_counts = fc->coef_counts_32x32; break; -#endif } VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l); @@ -249,7 +247,6 @@ static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) { return eob; } -#if CONFIG_TX32X32 int vp9_decode_sb_tokens(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { @@ -316,7 +313,6 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, A[8] = L[8] = A1[8] = L1[8] = 0; return eobtotal; } -#endif static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, MACROBLOCKD* const xd, diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h index eb9bc53bc..926a0661f 100644 --- a/vp9/decoder/vp9_detokenize.h +++ b/vp9/decoder/vp9_detokenize.h @@ -23,11 +23,9 @@ int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, int vp9_decode_mb_tokens(VP9D_COMP* const, MACROBLOCKD* const, BOOL_DECODER* const); -#if CONFIG_TX32X32 int vp9_decode_sb_tokens(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc); -#endif int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd, BOOL_DECODER* const bc); diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index ba06bffbf..d8659cbf8 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -972,10 +972,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) { vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]); -#if CONFIG_TX32X32 if (mi->sb_type && sz != TX_8X8) vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]); -#endif } } } @@ -1078,10 +1076,8 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, vp9_write(bc, sz != TX_4X4, c->prob_tx[0]); if (sz != TX_4X4 && ym <= TM_PRED) { vp9_write(bc, sz != TX_8X8, c->prob_tx[1]); -#if CONFIG_TX32X32 if (m->mbmi.sb_type && sz != TX_8X8) vp9_write(bc, sz != TX_16X16, c->prob_tx[2]); -#endif } } } @@ -1262,14 +1258,12 @@ static void build_coeff_contexts(VP9_COMP *cpi) { cpi, hybrid_context_counters_16x16, #endif cpi->frame_hybrid_branch_ct_16x16, BLOCK_TYPES_16X16); -#if CONFIG_TX32X32 build_tree_distribution(cpi->frame_coef_probs_32x32, cpi->coef_counts_32x32, #ifdef ENTROPY_STATS cpi, context_counters_32x32, #endif cpi->frame_branch_ct_32x32, BLOCK_TYPES_32X32); -#endif } static void update_coef_probs_common(vp9_writer* const bc, @@ -1446,7 +1440,6 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { BLOCK_TYPES_16X16); } -#if CONFIG_TX32X32 if (cpi->common.txfm_mode > ALLOW_16X16) { update_coef_probs_common(bc, #ifdef ENTROPY_STATS @@ -1458,7 +1451,6 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { cpi->frame_branch_ct_32x32, BLOCK_TYPES_32X32); } -#endif } #ifdef PACKET_TESTING @@ -1699,9 +1691,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, cpi->txfm_count_32x32p[TX_4X4] + cpi->txfm_count_32x32p[TX_8X8] + cpi->txfm_count_32x32p[TX_16X16] + -#if CONFIG_TX32X32 cpi->txfm_count_32x32p[TX_32X32] + -#endif cpi->txfm_count_16x16p[TX_4X4] + cpi->txfm_count_16x16p[TX_8X8] + cpi->txfm_count_16x16p[TX_16X16] + @@ -1711,35 +1701,25 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, cpi->txfm_count_16x16p[TX_8X8], cpi->txfm_count_32x32p[TX_8X8] + cpi->txfm_count_32x32p[TX_16X16] + -#if CONFIG_TX32X32 cpi->txfm_count_32x32p[TX_32X32] + -#endif cpi->txfm_count_16x16p[TX_8X8] + cpi->txfm_count_16x16p[TX_16X16]); -#if CONFIG_TX32X32 pc->prob_tx[2] = get_prob(cpi->txfm_count_32x32p[TX_16X16], cpi->txfm_count_32x32p[TX_16X16] + cpi->txfm_count_32x32p[TX_32X32]); -#endif } else { pc->prob_tx[0] = 128; pc->prob_tx[1] = 128; -#if CONFIG_TX32X32 pc->prob_tx[2] = 128; -#endif } vp9_write_literal(&header_bc, pc->txfm_mode <= 3 ? pc->txfm_mode : 3, 2); -#if CONFIG_TX32X32 if (pc->txfm_mode > ALLOW_16X16) { vp9_write_bit(&header_bc, pc->txfm_mode == TX_MODE_SELECT); } -#endif if (pc->txfm_mode == TX_MODE_SELECT) { vp9_write_literal(&header_bc, pc->prob_tx[0], 8); vp9_write_literal(&header_bc, pc->prob_tx[1], 8); -#if CONFIG_TX32X32 vp9_write_literal(&header_bc, pc->prob_tx[2], 8); -#endif } } @@ -1960,10 +1940,8 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, cpi->common.fc.coef_probs_16x16); vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16, cpi->common.fc.hybrid_coef_probs_16x16); -#if CONFIG_TX32X32 vp9_copy(cpi->common.fc.pre_coef_probs_32x32, cpi->common.fc.coef_probs_32x32); -#endif vp9_copy(cpi->common.fc.pre_sb_ymode_prob, cpi->common.fc.sb_ymode_prob); vp9_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob); vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob); @@ -2125,10 +2103,8 @@ void print_tree_update_probs() { print_tree_update_for_type(f, hybrid_tree_update_hist_16x16, BLOCK_TYPES_16X16, "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]"); -#if CONFIG_TX32X32 print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES_32X32, "vp9_coef_update_probs_32x32[BLOCK_TYPES_32X32]"); -#endif fclose(f); f = fopen("treeupdate.bin", "wb"); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index f856daddf..1960b9162 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -35,15 +35,11 @@ typedef struct block { int16_t *zbin; int16_t *zbin_8x8; int16_t *zbin_16x16; -#if CONFIG_TX32X32 int16_t *zbin_32x32; -#endif int16_t *zrun_zbin_boost; int16_t *zrun_zbin_boost_8x8; int16_t *zrun_zbin_boost_16x16; -#if CONFIG_TX32X32 int16_t *zrun_zbin_boost_32x32; -#endif int16_t *round; // Zbin Over Quant value @@ -57,9 +53,7 @@ typedef struct block { int eob_max_offset; int eob_max_offset_8x8; int eob_max_offset_16x16; -#if CONFIG_TX32X32 int eob_max_offset_32x32; -#endif } BLOCK; typedef struct { @@ -92,12 +86,10 @@ typedef struct { int64_t txfm_rd_diff[NB_TXFM_MODES]; } PICK_MODE_CONTEXT; -#if CONFIG_TX32X32 typedef struct superblock { DECLARE_ALIGNED(16, int16_t, src_diff[32*32+16*16*2]); DECLARE_ALIGNED(16, int16_t, coeff[32*32+16*16*2]); } SUPERBLOCK; -#endif typedef struct macroblock { DECLARE_ALIGNED(16, int16_t, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y @@ -106,9 +98,7 @@ typedef struct macroblock { // 1 DC 2nd order block each with 16 entries BLOCK block[25]; -#if CONFIG_TX32X32 SUPERBLOCK sb_coeff_data; -#endif YV12_BUFFER_CONFIG src; diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 0de6393a0..46d8ca315 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -1332,9 +1332,7 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) { #undef ROUNDING #endif -#if CONFIG_TX32X32 || CONFIG_TX64X64 #if !CONFIG_DWTDCTHYBRID -#if CONFIG_TX32X32 static void dct32_1d(double *input, double *output, int stride) { static const double C1 = 0.998795456205; // cos(pi * 1 / 64) static const double C2 = 0.995184726672; // cos(pi * 2 / 64) @@ -1685,7 +1683,6 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { vp9_clear_system_state(); // Make it simd safe : __asm emms; } -#endif // CONFIG_TX32X32 #else // CONFIG_DWTDCTHYBRID @@ -2142,7 +2139,6 @@ static void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch, vp9_clear_system_state(); // Make it simd safe : __asm emms; } -#if CONFIG_TX32X32 void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { // assume out is a 32x32 buffer short buffer[16 * 16]; @@ -2173,7 +2169,6 @@ void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { for (i = 0; i < 16; ++i) vpx_memcpy(out + i * 32 + 33 * 16, buffer + i * 16, sizeof(short) * 16); } -#endif // CONFIG_TX32X32 #if CONFIG_TX64X64 void vp9_short_fdct64x64_c(short *input, short *out, int pitch) { @@ -2235,4 +2230,3 @@ void vp9_short_fdct64x64_c(short *input, short *out, int pitch) { } #endif // CONFIG_TX64X64 #endif // CONFIG_DWTDCTHYBRID -#endif // CONFIG_TX32X32 || CONFIG_TX64X64 diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 0abf22cff..3ae8bd6d6 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -461,11 +461,9 @@ static void update_state(VP9_COMP *cpi, } } } -#if CONFIG_TX32X32 if (block_size == 16) { ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16]; } -#endif if (mb_mode == B_PRED) { for (i = 0; i < 16; i++) { @@ -1299,9 +1297,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_zero(cpi->hybrid_coef_counts_8x8); vp9_zero(cpi->coef_counts_16x16); vp9_zero(cpi->hybrid_coef_counts_16x16); -#if CONFIG_TX32X32 vp9_zero(cpi->coef_counts_32x32); -#endif #if CONFIG_NEW_MVREF vp9_zero(cpi->mb_mv_ref_count); #endif @@ -1570,11 +1566,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { * keyframe's probabilities as an estimate of what the current keyframe's * coefficient cost distributions may look like. */ if (frame_type == 0) { -#if CONFIG_TX32X32 txfm_type = ALLOW_32X32; -#else - txfm_type = ALLOW_16X16; -#endif } else #if 0 /* FIXME (rbultje) @@ -1605,15 +1597,9 @@ void vp9_encode_frame(VP9_COMP *cpi) { } else txfm_type = ALLOW_8X8; #else -#if CONFIG_TX32X32 txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >= - cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? - ALLOW_32X32 : TX_MODE_SELECT; -#else - txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_16X16] >= - cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? - ALLOW_16X16 : TX_MODE_SELECT; -#endif + cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? + ALLOW_32X32 : TX_MODE_SELECT; #endif cpi->common.txfm_mode = txfm_type; if (txfm_type != TX_MODE_SELECT) { @@ -1665,11 +1651,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { const int count8x8_8x8p = cpi->txfm_count_8x8p[TX_8X8]; const int count16x16_16x16p = cpi->txfm_count_16x16p[TX_16X16]; const int count16x16_lp = cpi->txfm_count_32x32p[TX_16X16]; -#if CONFIG_TX32X32 const int count32x32 = cpi->txfm_count_32x32p[TX_32X32]; -#else - const int count32x32 = 0; -#endif if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && count32x32 == 0) { @@ -1679,15 +1661,11 @@ void vp9_encode_frame(VP9_COMP *cpi) { count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { cpi->common.txfm_mode = ONLY_4X4; reset_skip_txfm_size(cpi, TX_4X4); -#if CONFIG_TX32X32 } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { cpi->common.txfm_mode = ALLOW_32X32; -#endif } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { cpi->common.txfm_mode = ALLOW_16X16; -#if CONFIG_TX32X32 reset_skip_txfm_size(cpi, TX_16X16); -#endif } } @@ -1913,7 +1891,6 @@ static void update_sb64_skip_coeff_state(VP9_COMP *cpi, int skip[16], int output_enabled) { MACROBLOCK *const x = &cpi->mb; -#if CONFIG_TX32X32 if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_32X32) { TOKENEXTRA tokens[4][1024+512]; int n_tokens[4], n; @@ -1961,9 +1938,7 @@ static void update_sb64_skip_coeff_state(VP9_COMP *cpi, (*tp) += n_tokens[n]; } } - } else -#endif // CONFIG_TX32X32 - { + } else { TOKENEXTRA tokens[16][16 * 25]; int n_tokens[16], n; @@ -2388,7 +2363,6 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, xd->dst.y_stride, xd->dst.uv_stride); } -#if CONFIG_TX32X32 if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { if (!x->skip) { vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, @@ -2435,9 +2409,7 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; } skip[0] = skip[2] = skip[1] = skip[3] = mi->mbmi.mb_skip_coeff; - } else -#endif - { + } else { for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; @@ -2502,11 +2474,7 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; } else { TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? -#if CONFIG_TX32X32 TX_32X32 : -#else - TX_16X16 : -#endif cm->txfm_mode; mi->mbmi.txfm_size = sz; if (mb_col < cm->mb_cols - 1) @@ -2634,7 +2602,6 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, xd->dst.y_stride, xd->dst.uv_stride); } -#if CONFIG_TX32X32 if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { int n; @@ -2705,9 +2672,7 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, } skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; } - } else -#endif - { + } else { for (n = 0; n < 16; n++) { const int x_idx = n & 3, y_idx = n >> 2; @@ -2766,15 +2731,9 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, if (output_enabled) { if (cm->txfm_mode == TX_MODE_SELECT && !((cm->mb_no_coeff_skip && - ( -#if CONFIG_TX32X32 - (mi->mbmi.txfm_size == TX_32X32 && + ((mi->mbmi.txfm_size == TX_32X32 && skip[0] && skip[1] && skip[2] && skip[3]) || -#endif // CONFIG_TX32X32 - ( -#if CONFIG_TX32X32 - mi->mbmi.txfm_size != TX_32X32 && -#endif // CONFIG_TX32X32 + (mi->mbmi.txfm_size != TX_32X32 && skip[0] && skip[1] && skip[2] && skip[3] && skip[4] && skip[5] && skip[6] && skip[7] && skip[8] && skip[9] && skip[10] && skip[11] && @@ -2785,11 +2744,7 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, } else { int x, y; TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? -#if CONFIG_TX32X32 TX_32X32 : -#else - TX_16X16 : -#endif cm->txfm_mode; for (y = 0; y < 4; y++) { for (x = 0; x < 4; x++) { diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index a6b41fffb..45278a71b 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -108,7 +108,6 @@ void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, int src_stride, } } -#if CONFIG_TX32X32 void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, const uint8_t *pred, int dst_stride) { int r, c; @@ -152,7 +151,6 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, vsrc += src_stride; } } -#endif void vp9_subtract_mby_c(int16_t *diff, uint8_t *src, uint8_t *pred, int stride) { @@ -311,7 +309,6 @@ void vp9_transform_mb_16x16(MACROBLOCK *x) { vp9_transform_mbuv_8x8(x); } -#if CONFIG_TX32X32 void vp9_transform_sby_32x32(MACROBLOCK *x) { SUPERBLOCK * const x_sb = &x->sb_coeff_data; vp9_short_fdct32x32(x_sb->src_diff, x_sb->coeff, 64); @@ -325,7 +322,6 @@ void vp9_transform_sbuv_16x16(MACROBLOCK *x) { x->vp9_short_fdct16x16(x_sb->src_diff + 1280, x_sb->coeff + 1280, 32); } -#endif #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) #define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index b8bf7de0d..f3c679227 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -43,10 +43,8 @@ void vp9_transform_mb_16x16(MACROBLOCK *mb); void vp9_transform_mby_16x16(MACROBLOCK *x); void vp9_optimize_mby_16x16(MACROBLOCK *x); -#if CONFIG_TX32X32 void vp9_transform_sby_32x32(MACROBLOCK *x); void vp9_transform_sbuv_16x16(MACROBLOCK *x); -#endif void vp9_fidct_mb(MACROBLOCK *x); @@ -59,13 +57,11 @@ void vp9_subtract_mbuv_s_c(int16_t *diff, const uint8_t *usrc, void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, int src_stride, const uint8_t *pred, int dst_stride); -#if CONFIG_TX32X32 void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, const uint8_t *pred, int dst_stride); void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, const uint8_t *vsrc, int src_stride, const uint8_t *upred, const uint8_t *vpred, int dst_stride); -#endif // CONFIG_TX32X32 #endif // VP9_ENCODER_VP9_ENCODEMB_H_ diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 6a4c62496..90f350004 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -3461,9 +3461,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16); vp9_copy(cpi->common.fc.hybrid_coef_counts_16x16, cpi->hybrid_coef_counts_16x16); -#if CONFIG_TX32X32 vp9_copy(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32); -#endif vp9_adapt_coef_probs(&cpi->common); if (cpi->common.frame_type != KEY_FRAME) { vp9_copy(cpi->common.fc.sb_ymode_counts, cpi->sb_ymode_count); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 403335047..74a58b430 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -92,9 +92,7 @@ typedef struct { vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]; vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES_16X16]; vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; -#endif vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1]; vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ @@ -331,14 +329,12 @@ typedef struct VP9_COMP { DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_16x16[QINDEX_RANGE][256]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_16x16[QINDEX_RANGE][256]); -#if CONFIG_TX32X32 DECLARE_ALIGNED(16, short, Y1zbin_32x32[QINDEX_RANGE][1024]); DECLARE_ALIGNED(16, short, Y2zbin_32x32[QINDEX_RANGE][1024]); DECLARE_ALIGNED(16, short, UVzbin_32x32[QINDEX_RANGE][1024]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1_32x32[QINDEX_RANGE][1024]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_32x32[QINDEX_RANGE][1024]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_32x32[QINDEX_RANGE][1024]); -#endif MACROBLOCK mb; VP9_COMMON common; @@ -509,11 +505,9 @@ typedef struct VP9_COMP { vp9_coeff_probs frame_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]; vp9_coeff_stats frame_hybrid_branch_ct_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES_32X32]; vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES_32X32]; -#endif int gfu_boost; int last_boost; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 98396a1a7..36b656713 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -379,7 +379,6 @@ void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) { &d->eob, vp9_default_zig_zag1d_16x16, 1); } -#if CONFIG_TX32X32 void vp9_quantize_sby_32x32(MACROBLOCK *x) { x->e_mbd.block[0].eob = 0; quantize(x->block[0].zrun_zbin_boost_32x32, @@ -413,7 +412,6 @@ void vp9_quantize_sbuv_16x16(MACROBLOCK *x) { &x->e_mbd.block[i].eob, vp9_default_zig_zag1d_16x16, 1); } -#endif /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of * these two C functions if corresponding optimized routine is not available. @@ -472,7 +470,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) { 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, }; -#if CONFIG_TX32X32 static const int zbin_boost_32x32[1024] = { 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48, @@ -539,7 +536,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) { 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, }; -#endif int qrounding_factor = 48; @@ -569,11 +565,9 @@ void vp9_init_quantizer(VP9_COMP *cpi) { ((quant_val * zbin_boost_8x8[0]) + 64) >> 7; cpi->zrun_zbin_boost_y1_16x16[Q][0] = ((quant_val * zbin_boost_16x16[0]) + 64) >> 7; -#if CONFIG_TX32X32 cpi->Y1zbin_32x32[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7; cpi->zrun_zbin_boost_y1_32x32[Q][0] = ((quant_val * zbin_boost_32x32[0]) + 64) >> 7; -#endif quant_val = vp9_dc2quant(Q, cpi->common.y2dc_delta_q); @@ -677,7 +671,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->zrun_zbin_boost_uv_16x16[Q][i] = ((quant_val * zbin_boost_16x16[i]) + 64) >> 7; } -#if CONFIG_TX32X32 // 32x32 structures. Same comment above applies. for (i = 1; i < 1024; i++) { int rc = vp9_default_zig_zag1d_32x32[i]; @@ -687,7 +680,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->zrun_zbin_boost_y1_32x32[Q][i] = ((quant_val * zbin_boost_32x32[i]) + 64) >> 7; } -#endif } } @@ -727,17 +719,13 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { x->block[i].zbin = cpi->Y1zbin[QIndex]; x->block[i].zbin_8x8 = cpi->Y1zbin_8x8[QIndex]; x->block[i].zbin_16x16 = cpi->Y1zbin_16x16[QIndex]; -#if CONFIG_TX32X32 x->block[i].zbin_32x32 = cpi->Y1zbin_32x32[QIndex]; -#endif x->block[i].round = cpi->Y1round[QIndex]; x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex]; x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex]; x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y1_8x8[QIndex]; x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y1_16x16[QIndex]; -#if CONFIG_TX32X32 x->block[i].zrun_zbin_boost_32x32 = cpi->zrun_zbin_boost_y1_32x32[QIndex]; -#endif x->block[i].zbin_extra = (int16_t)zbin_extra; // Segment max eob offset feature. @@ -748,17 +736,13 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); x->block[i].eob_max_offset_16x16 = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); -#if CONFIG_TX32X32 x->block[i].eob_max_offset_32x32 = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB); -#endif } else { x->block[i].eob_max_offset = 16; x->block[i].eob_max_offset_8x8 = 64; x->block[i].eob_max_offset_16x16 = 256; -#if CONFIG_TX32X32 x->block[i].eob_max_offset_32x32 = 1024; -#endif } } diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index 4ca3081db..ac44a751c 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -78,10 +78,8 @@ void vp9_quantize_mb_16x16(MACROBLOCK *x); extern prototype_quantize_block(vp9_quantize_quantb_16x16); extern prototype_quantize_mb(vp9_quantize_mby_16x16); -#if CONFIG_TX32X32 void vp9_quantize_sby_32x32(MACROBLOCK *x); void vp9_quantize_sbuv_16x16(MACROBLOCK *x); -#endif struct VP9_COMP; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 353ab979f..f663b56c9 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -173,9 +173,7 @@ void vp9_save_coding_context(VP9_COMP *cpi) { vp9_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8); vp9_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16); vp9_copy(cc->hybrid_coef_probs_16x16, cm->fc.hybrid_coef_probs_16x16); -#if CONFIG_TX32X32 vp9_copy(cc->coef_probs_32x32, cm->fc.coef_probs_32x32); -#endif vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob); #if CONFIG_COMP_INTERINTRA_PRED cc->interintra_prob = cm->fc.interintra_prob; @@ -233,9 +231,7 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { vp9_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8); vp9_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16); vp9_copy(cm->fc.hybrid_coef_probs_16x16, cc->hybrid_coef_probs_16x16); -#if CONFIG_TX32X32 vp9_copy(cm->fc.coef_probs_32x32, cc->coef_probs_32x32); -#endif vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob); #if CONFIG_COMP_INTERINTRA_PRED cm->fc.interintra_prob = cc->interintra_prob; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index a8ece2c58..7d2c489d5 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -293,10 +293,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) { fill_token_costs(cpi->mb.hybrid_token_costs[TX_16X16], cpi->common.fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16); -#if CONFIG_TX32X32 fill_token_costs(cpi->mb.token_costs[TX_32X32], cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32); -#endif /*rough estimate for costing*/ cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; @@ -435,9 +433,7 @@ static int cost_coeffs(MACROBLOCK *mb, int pt; const int eob = b->eob; MACROBLOCKD *xd = &mb->e_mbd; -#if CONFIG_TX32X32 const int ib = (int)(b - xd->block); -#endif int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0; int cost = 0, seg_eob; const int segment_id = xd->mode_info_context->mbmi.segment_id; @@ -480,21 +476,17 @@ static int cost_coeffs(MACROBLOCK *mb, scan = vp9_default_zig_zag1d_16x16; band = vp9_coef_bands_16x16; seg_eob = 256; -#if CONFIG_TX32X32 if (type == PLANE_TYPE_UV) { const int uv_idx = ib - 16; qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx; } -#endif break; -#if CONFIG_TX32X32 case TX_32X32: scan = vp9_default_zig_zag1d_32x32; band = vp9_coef_bands_32x32; seg_eob = 1024; qcoeff_ptr = xd->sb_coeff_data.qcoeff; break; -#endif default: abort(); break; @@ -761,21 +753,17 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, } } -#if CONFIG_TX32X32 if (max_txfm_size == TX_32X32 && (cm->txfm_mode == ALLOW_32X32 || (cm->txfm_mode == TX_MODE_SELECT && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]))) { mbmi->txfm_size = TX_32X32; - } else -#endif - if ( cm->txfm_mode == ALLOW_16X16 || -#if CONFIG_TX32X32 - (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) || -#endif - (cm->txfm_mode == TX_MODE_SELECT && - rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])) { + } else if ( cm->txfm_mode == ALLOW_16X16 || + (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) || + (cm->txfm_mode == TX_MODE_SELECT && + rd[TX_16X16][1] < rd[TX_8X8][1] && + rd[TX_16X16][1] < rd[TX_4X4][1])) { mbmi->txfm_size = TX_16X16; } else if (cm->txfm_mode == ALLOW_8X8 || (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { @@ -792,15 +780,12 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, txfm_cache[ONLY_4X4] = rd[TX_4X4][0]; txfm_cache[ALLOW_8X8] = rd[TX_8X8][0]; txfm_cache[ALLOW_16X16] = rd[TX_16X16][0]; -#if CONFIG_TX32X32 txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0]; if (max_txfm_size == TX_32X32 && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]) txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; - else -#endif - if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) + else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; else txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? @@ -833,7 +818,6 @@ static void copy_predictor(uint8_t *dst, const uint8_t *predictor) { d[12] = p[12]; } -#if CONFIG_TX32X32 static int rdcost_sby_32x32(MACROBLOCK *x, int backup) { MACROBLOCKD * const xd = &x->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; @@ -895,7 +879,6 @@ static void super_block_yrd_32x32(MACROBLOCK *x, *rate = rdcost_sby_32x32(x, backup); *skippable = vp9_sby_is_skippable_32x32(&x->e_mbd); } -#endif static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, @@ -918,11 +901,9 @@ static void super_block_yrd(VP9_COMP *cpi, s[n] = 1; } -#if CONFIG_TX32X32 vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, dst, dst_y_stride); super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1); -#endif #if DEBUG_ERROR int err[3] = { 0, 0, 0 }; @@ -1003,7 +984,6 @@ static void super_block_64_yrd(VP9_COMP *cpi, s[n] = 1; } -#if CONFIG_TX32X32 for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; int r_tmp, d_tmp, s_tmp; @@ -1020,7 +1000,6 @@ static void super_block_64_yrd(VP9_COMP *cpi, d[TX_32X32] += d_tmp; s[TX_32X32] = s[TX_32X32] && s_tmp; } -#endif #if DEBUG_ERROR int err[3] = { 0, 0, 0 }; @@ -1784,7 +1763,6 @@ static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -#if CONFIG_TX32X32 static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) { int b; int cost = 0; @@ -1824,7 +1802,6 @@ static void rd_inter32x32_uv_16x16(MACROBLOCK *x, int *rate, xd->sb_coeff_data.dqcoeff + 1024, 512) >> 2; *skip = vp9_sbuv_is_skippable_16x16(xd); } -#endif static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel, int *skip) { @@ -1834,15 +1811,12 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; -#if CONFIG_TX32X32 if (mbmi->txfm_size == TX_32X32) { vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); rd_inter32x32_uv_16x16(x, rate, distortion, skip, 1); - } else -#endif - { + } else { int n, r = 0, d = 0; int skippable = 1; ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; @@ -2040,15 +2014,12 @@ static void super_block_uvrd(MACROBLOCK *x, const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; -#if CONFIG_TX32X32 if (mbmi->txfm_size == TX_32X32) { vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); rd_inter32x32_uv_16x16(x, rate, distortion, skippable, 1); - } else -#endif - { + } else { int d = 0, r = 0, n, s = 1; ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; ENTROPY_CONTEXT_PLANES *ta_orig = xd->above_context; @@ -2113,7 +2084,6 @@ static void super_block_64_uvrd(MACROBLOCK *x, memcpy(t_above, xd->above_context, sizeof(t_above)); memcpy(t_left, xd->left_context, sizeof(t_left)); -#if CONFIG_TX32X32 if (mbmi->txfm_size == TX_32X32) { int n; @@ -2136,9 +2106,7 @@ static void super_block_64_uvrd(MACROBLOCK *x, d += d_tmp; s = s && s_tmp; } - } else -#endif - { + } else { for (n = 0; n < 16; n++) { int x_idx = n & 3, y_idx = n >> 2; @@ -4749,11 +4717,9 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int dist_uv_4x4 = 0, dist_uv_8x8 = 0, uv_skip_4x4 = 0, uv_skip_8x8 = 0; MB_PREDICTION_MODE mode_uv_4x4 = NEARESTMV, mode_uv_8x8 = NEARESTMV; int switchable_filter_index = 0; -#if CONFIG_TX32X32 int rate_uv_16x16 = 0, rate_uv_tokenonly_16x16 = 0; int dist_uv_16x16 = 0, uv_skip_16x16 = 0; MB_PREDICTION_MODE mode_uv_16x16 = NEARESTMV; -#endif x->skip = 0; xd->mode_info_context->mbmi.segment_id = segment_id; @@ -4790,7 +4756,6 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &dist_uv_8x8, &uv_skip_8x8); mode_uv_8x8 = mbmi->uv_mode; } -#if CONFIG_TX32X32 if (cm->txfm_mode >= ALLOW_32X32) { mbmi->txfm_size = TX_32X32; rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_16x16, @@ -4798,7 +4763,6 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &dist_uv_16x16, &uv_skip_16x16); mode_uv_16x16 = mbmi->uv_mode; } -#endif // CONFIG_TX32X32 } else { assert(block_size == BLOCK_32X32); mbmi->mode = DC_PRED; @@ -4814,14 +4778,12 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &dist_uv_8x8, &uv_skip_8x8); mode_uv_8x8 = mbmi->uv_mode; } -#if CONFIG_TX32X32 if (cm->txfm_mode >= ALLOW_32X32) { mbmi->txfm_size = TX_32X32; rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_16x16, &rate_uv_tokenonly_16x16, &dist_uv_16x16, &uv_skip_16x16); mode_uv_16x16 = mbmi->uv_mode; } -#endif // CONFIG_TX32X32 } for (mode_index = 0; mode_index < MAX_MODES; @@ -4965,13 +4927,11 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, distortion_uv = dist_uv_4x4; skippable = skippable && uv_skip_4x4; mbmi->uv_mode = mode_uv_4x4; -#if CONFIG_TX32X32 } else if (mbmi->txfm_size == TX_32X32) { rate_uv = rate_uv_16x16; distortion_uv = dist_uv_16x16; skippable = skippable && uv_skip_16x16; mbmi->uv_mode = mode_uv_16x16; -#endif // CONFIG_TX32X32 } else { rate_uv = rate_uv_8x8; distortion_uv = dist_uv_8x8; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index dda81c838..fc99311ae 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -31,9 +31,7 @@ vp9_coeff_accum context_counters_8x8[BLOCK_TYPES_8X8]; vp9_coeff_accum hybrid_context_counters_8x8[BLOCK_TYPES_8X8]; vp9_coeff_accum context_counters_16x16[BLOCK_TYPES_16X16]; vp9_coeff_accum hybrid_context_counters_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32]; -#endif extern vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES_4X4]; extern vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4]; @@ -41,9 +39,7 @@ extern vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES_8X8]; extern vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8]; extern vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES_16X16]; extern vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16]; -#if CONFIG_TX32X32 extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32]; -#endif #endif /* ENTROPY_STATS */ static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2]; @@ -141,12 +137,10 @@ static void tokenize_b(VP9_COMP *cpi, vp9_block2left[tx_size][ib]; ENTROPY_CONTEXT a_ec = *a, l_ec = *l; -#if CONFIG_TX32X32 ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) + vp9_block2above[tx_size][ib]; ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) + vp9_block2left[tx_size][ib]; -#endif switch (tx_size) { @@ -195,11 +189,9 @@ static void tokenize_b(VP9_COMP *cpi, if (type != PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; -#if CONFIG_TX32X32 } else { a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; -#endif } #endif seg_eob = 256; @@ -212,14 +204,11 @@ static void tokenize_b(VP9_COMP *cpi, counts = cpi->coef_counts_16x16; probs = cpi->common.fc.coef_probs_16x16; } -#if CONFIG_TX32X32 if (type == PLANE_TYPE_UV) { int uv_idx = (ib - 16) >> 2; qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 256 * uv_idx; } -#endif break; -#if CONFIG_TX32X32 case TX_32X32: #if CONFIG_CNVCONTEXT a_ec = a[0] + a[1] + a[2] + a[3] + @@ -236,7 +225,6 @@ static void tokenize_b(VP9_COMP *cpi, probs = cpi->common.fc.coef_probs_32x32; qcoeff_ptr = xd->sb_coeff_data.qcoeff; break; -#endif } VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); @@ -294,19 +282,15 @@ static void tokenize_b(VP9_COMP *cpi, if (type != PLANE_TYPE_UV) { a[1] = a[2] = a[3] = a_ec; l[1] = l[2] = l[3] = l_ec; -#if CONFIG_TX32X32 } else { a1[0] = a1[1] = a[1] = a_ec; l1[0] = l1[1] = l[1] = l_ec; -#endif } -#if CONFIG_TX32X32 } else if (tx_size == TX_32X32) { a[1] = a[2] = a[3] = a_ec; l[1] = l[2] = l[3] = l_ec; a1[0] = a1[1] = a1[2] = a1[3] = a_ec; l1[0] = l1[1] = l1[2] = l1[3] = l_ec; -#endif } } @@ -378,7 +362,6 @@ static int mb_is_skippable_16x16(MACROBLOCKD *xd) { return (vp9_mby_is_skippable_16x16(xd) & vp9_mbuv_is_skippable_8x8(xd)); } -#if CONFIG_TX32X32 int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) { int skip = 1; skip &= !xd->block[0].eob; @@ -440,7 +423,6 @@ void vp9_tokenize_sb(VP9_COMP *cpi, if (dry_run) *t = t_backup; } -#endif void vp9_tokenize_mb(VP9_COMP *cpi, MACROBLOCKD *xd, @@ -557,9 +539,7 @@ void init_context_counters(void) { vpx_memset(context_counters_16x16, 0, sizeof(context_counters_16x16)); vpx_memset(hybrid_context_counters_16x16, 0, sizeof(hybrid_context_counters_16x16)); -#if CONFIG_TX32X32 vpx_memset(context_counters_32x32, 0, sizeof(context_counters_32x32)); -#endif } else { fread(context_counters_4x4, sizeof(context_counters_4x4), 1, f); fread(hybrid_context_counters_4x4, @@ -570,9 +550,7 @@ void init_context_counters(void) { fread(context_counters_16x16, sizeof(context_counters_16x16), 1, f); fread(hybrid_context_counters_16x16, sizeof(hybrid_context_counters_16x16), 1, f); -#if CONFIG_TX32X32 fread(context_counters_32x32, sizeof(context_counters_32x32), 1, f); -#endif fclose(f); } @@ -587,9 +565,7 @@ void init_context_counters(void) { vpx_memset(tree_update_hist_16x16, 0, sizeof(tree_update_hist_16x16)); vpx_memset(hybrid_tree_update_hist_16x16, 0, sizeof(hybrid_tree_update_hist_16x16)); -#if CONFIG_TX32X32 vpx_memset(tree_update_hist_32x32, 0, sizeof(tree_update_hist_32x32)); -#endif } else { fread(tree_update_hist_4x4, sizeof(tree_update_hist_4x4), 1, f); fread(hybrid_tree_update_hist_4x4, @@ -600,9 +576,7 @@ void init_context_counters(void) { fread(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f); fread(hybrid_tree_update_hist_16x16, sizeof(hybrid_tree_update_hist_16x16), 1, f); -#if CONFIG_TX32X32 fread(tree_update_hist_32x32, sizeof(tree_update_hist_32x32), 1, f); -#endif fclose(f); } } @@ -702,10 +676,8 @@ void print_context_counters() { "vp9_default_coef_counts_16x16[BLOCK_TYPES_16X16]"); print_counter(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16, "vp9_default_hybrid_coef_counts_16x16[BLOCK_TYPES_16X16]"); -#if CONFIG_TX32X32 print_counter(f, context_counters_32x32, BLOCK_TYPES_32X32, "vp9_default_coef_counts_32x32[BLOCK_TYPES_32X32]"); -#endif /* print coefficient probabilities */ print_probs(f, context_counters_4x4, BLOCK_TYPES_4X4, @@ -720,10 +692,8 @@ void print_context_counters() { "default_coef_probs_16x16[BLOCK_TYPES_16X16]"); print_probs(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16, "default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]"); -#if CONFIG_TX32X32 print_probs(f, context_counters_32x32, BLOCK_TYPES_32X32, "default_coef_probs_32x32[BLOCK_TYPES_32X32]"); -#endif fclose(f); @@ -737,9 +707,7 @@ void print_context_counters() { fwrite(context_counters_16x16, sizeof(context_counters_16x16), 1, f); fwrite(hybrid_context_counters_16x16, sizeof(hybrid_context_counters_16x16), 1, f); -#if CONFIG_TX32X32 fwrite(context_counters_32x32, sizeof(context_counters_32x32), 1, f); -#endif fclose(f); } #endif @@ -768,12 +736,10 @@ static __inline void stuff_b(VP9_COMP *cpi, ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib]; ENTROPY_CONTEXT a_ec = *a, l_ec = *l; -#if CONFIG_TX32X32 ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) + vp9_block2above[tx_size][ib]; ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) + vp9_block2left[tx_size][ib]; -#endif switch (tx_size) { default: @@ -808,11 +774,9 @@ static __inline void stuff_b(VP9_COMP *cpi, if (type != PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a[2] + a[3]) != 0; l_ec = (l[0] + l[1] + l[2] + l[3]) != 0; -#if CONFIG_TX32X32 } else { a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; -#endif } #endif bands = vp9_coef_bands_16x16; @@ -824,7 +788,6 @@ static __inline void stuff_b(VP9_COMP *cpi, probs = cpi->common.fc.coef_probs_16x16; } break; -#if CONFIG_TX32X32 case TX_32X32: #if CONFIG_CNVCONTEXT a_ec = a[0] + a[1] + a[2] + a[3] + @@ -838,7 +801,6 @@ static __inline void stuff_b(VP9_COMP *cpi, counts = cpi->coef_counts_32x32; probs = cpi->common.fc.coef_probs_32x32; break; -#endif } VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); @@ -857,19 +819,15 @@ static __inline void stuff_b(VP9_COMP *cpi, if (type != PLANE_TYPE_UV) { a[1] = a[2] = a[3] = 0; l[1] = l[2] = l[3] = 0; -#if CONFIG_TX32X32 } else { a1[0] = a1[1] = a[1] = a_ec; l1[0] = l1[1] = l[1] = l_ec; -#endif } -#if CONFIG_TX32X32 } else if (tx_size == TX_32X32) { a[1] = a[2] = a[3] = a_ec; l[1] = l[2] = l[3] = l_ec; a1[0] = a1[1] = a1[2] = a1[3] = a_ec; l1[0] = l1[1] = l1[2] = l1[3] = l_ec; -#endif } if (!dry_run) { @@ -983,7 +941,6 @@ void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { } } -#if CONFIG_TX32X32 static void stuff_sb_32x32(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { int b; @@ -1003,11 +960,8 @@ void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { *t = t_backup; } } -#endif -#if CONFIG_TX32X32 void vp9_fix_contexts_sb(MACROBLOCKD *xd) { vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); } -#endif diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 9bc756e82..3eeb8fa5a 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -36,10 +36,8 @@ extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block); extern int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd); extern int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd); -#if CONFIG_TX32X32 extern int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd); extern int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd); -#endif struct VP9_COMP; @@ -50,14 +48,10 @@ extern void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, extern void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); -#if CONFIG_TX32X32 extern void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); -#endif -#if CONFIG_TX32X32 extern void vp9_fix_contexts_sb(MACROBLOCKD *xd); -#endif #ifdef ENTROPY_STATS void init_context_counters(); void print_context_counters(); From 55657aac49db87d2848c1022448e3841c621e1b9 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 10 Jan 2013 08:36:42 -0800 Subject: [PATCH 58/77] Fix wrong pitch argument in dct32x32 unit test. Change-Id: Id9474a1686daebfa3d004e21823bf1888ec9e534 --- test/dct32x32_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc index f0623eb32..827b13316 100644 --- a/test/dct32x32_test.cc +++ b/test/dct32x32_test.cc @@ -180,7 +180,7 @@ TEST(VP9Fdct32x32Test, CoeffSizeCheck) { for (int j = 0; j < 1024; ++j) input_extreme_block[j] = 255; - const int pitch = 32; + const int pitch = 64; vp9_short_fdct32x32_c(input_block, output_block, pitch); vp9_short_fdct32x32_c(input_extreme_block, output_extreme_block, pitch); From 6c9fb22e13be7996e784038d7580444895c89895 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Thu, 10 Jan 2013 10:55:07 -0800 Subject: [PATCH 59/77] Reduce the usage of widerlpf The commit changed to not to use wider lpf within a superblock when 32x32 transform is used for the block. The commit also changed to use the shorter version of loop filtering: for UV planes. Change-Id: I344c1fb9a3be9d1200782a788bcb0b001fedcff8 --- vp9/common/vp9_loopfilter.c | 36 ++++++++++++++++++++--------- vp9/common/vp9_loopfilter_filters.c | 8 +++---- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 0badd276c..a7973bea6 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -176,19 +176,31 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, } } +// Determine if we should skip inner-MB loop filtering within a MB +// The current condition is that the loop filtering is skipped only +// the MB uses a prediction size of 16x16 and either 16x16 transform +// is used or there is no residue at all. static int mb_lf_skip(const MB_MODE_INFO *const mbmi) { const MB_PREDICTION_MODE mode = mbmi->mode; const int skip_coef = mbmi->mb_skip_coeff; const int tx_size = mbmi->txfm_size; return mode != B_PRED && mode != I8X8_PRED && mode != SPLITMV && - tx_size >= TX_16X16 && skip_coef; + (tx_size >= TX_16X16 || skip_coef); } + +// Determine if we should skip MB loop filtering on a MB edge within +// a superblock, the current condition is that MB loop filtering is +// skipped only when both MBs do not use inner MB loop filtering, and +// same motion vector with same reference frame static int sb_mb_lf_skip(const MODE_INFO *const mip0, const MODE_INFO *const mip1) { - return mb_lf_skip(&mip0->mbmi) && - mb_lf_skip(&mip1->mbmi) && - mip0->mbmi.txfm_size >= TX_32X32 && - mip0->mbmi.ref_frame; + const MB_MODE_INFO *mbmi0 = &mip0->mbmi; + const MB_MODE_INFO *mbmi1 = &mip0->mbmi; + return mb_lf_skip(mbmi0) && mb_lf_skip(mbmi1) && + (mbmi0->ref_frame == mbmi1->ref_frame) && + (mbmi0->mv[mbmi0->ref_frame].as_int == + mbmi1->mv[mbmi1->ref_frame].as_int) && + mbmi0->ref_frame != INTRA_FRAME; } void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd, @@ -235,9 +247,10 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, lfi.lim = lfi_n->lim[filter_level]; lfi.hev_thr = lfi_n->hev_thr[hev_index]; - if (mb_col > 0 - && !((mb_col & 1) && mode_info_context->mbmi.sb_type && - sb_mb_lf_skip(mode_info_context - 1, mode_info_context)) + if (mb_col > 0 && + !((mb_col & 1) && mode_info_context->mbmi.sb_type && + (sb_mb_lf_skip(mode_info_context - 1, mode_info_context) || + tx_size >= TX_32X32)) ) { #if CONFIG_WIDERLPF if (tx_size >= TX_16X16) @@ -258,9 +271,10 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, } /* don't apply across umv border */ - if (mb_row > 0 - && !((mb_row & 1) && mode_info_context->mbmi.sb_type && - sb_mb_lf_skip(mode_info_context - mis, mode_info_context)) + if (mb_row > 0 && + !((mb_row & 1) && mode_info_context->mbmi.sb_type && + (sb_mb_lf_skip(mode_info_context - mis, mode_info_context) || + tx_size >= TX_32X32)) ) { #if CONFIG_WIDERLPF if (tx_size >= TX_16X16) diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c index c73e37ba3..18438270f 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vp9/common/vp9_loopfilter_filters.c @@ -682,11 +682,11 @@ void vp9_lpf_mbv_w(unsigned char *y_ptr, unsigned char *u_ptr, lfi->mblim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) - vp9_mb_lpf_vertical_edge_w(u_ptr, uv_stride, + vp9_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); if (v_ptr) - vp9_mb_lpf_vertical_edge_w(v_ptr, uv_stride, + vp9_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } void vp9_lpf_mbh_w(unsigned char *y_ptr, unsigned char *u_ptr, @@ -696,11 +696,11 @@ void vp9_lpf_mbh_w(unsigned char *y_ptr, unsigned char *u_ptr, lfi->mblim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) - vp9_mb_lpf_horizontal_edge_w(u_ptr, uv_stride, + vp9_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); if (v_ptr) - vp9_mb_lpf_horizontal_edge_w(v_ptr, uv_stride, + vp9_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } From 9431536045ea205ed781a830fa40501bb423c572 Mon Sep 17 00:00:00 2001 From: Jim Bankoski Date: Fri, 11 Jan 2013 09:45:45 -0800 Subject: [PATCH 60/77] rtcd for new wider loop filters Change-Id: I8826bcdcf72ba6d86bde31cd13902a710399805c --- vp9/common/vp9_loopfilter_filters.c | 4 ++-- vp9/common/vp9_rtcd_defs.sh | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c index c73e37ba3..5df093307 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vp9/common/vp9_loopfilter_filters.c @@ -675,7 +675,7 @@ void vp9_mb_lpf_vertical_edge_w } while (++i < count * 8); } -void vp9_lpf_mbv_w(unsigned char *y_ptr, unsigned char *u_ptr, +void vp9_lpf_mbv_w_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { vp9_mb_lpf_vertical_edge_w(y_ptr, y_stride, @@ -689,7 +689,7 @@ void vp9_lpf_mbv_w(unsigned char *y_ptr, unsigned char *u_ptr, vp9_mb_lpf_vertical_edge_w(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } -void vp9_lpf_mbh_w(unsigned char *y_ptr, unsigned char *u_ptr, +void vp9_lpf_mbh_w_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { vp9_mb_lpf_horizontal_edge_w(y_ptr, y_stride, diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 004d4f5b9..33ed5a490 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -233,6 +233,11 @@ vp9_loop_filter_simple_bh_c=vp9_loop_filter_bhs_c vp9_loop_filter_simple_bh_mmx=vp9_loop_filter_bhs_mmx vp9_loop_filter_simple_bh_sse2=vp9_loop_filter_bhs_sse2 +prototype void vp9_lpf_mbh_w "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi" +specialize vp9_lpf_mbh_w + +prototype void vp9_lpf_mbv_w "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi" +specialize vp9_lpf_mbv_w # # post proc # From d27ae620bce34ebb4a78502ad7b10d14f491b216 Mon Sep 17 00:00:00 2001 From: Paul Wilkins Date: Fri, 11 Jan 2013 19:41:39 +0000 Subject: [PATCH 61/77] Remove INT64_MAX references. Replace INT64_MAX references with LLONG_MAX for windows build. Change-Id: Ib8b45c1e9c15c043b2f54c27ed83b8682b2be34f --- vp9/encoder/vp9_rdopt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 7d2c489d5..a82670b6e 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1362,7 +1362,7 @@ static int64_t rd_pick_intra_sb64y_mode(VP9_COMP *cpi, MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); int this_rate, this_rate_tokenonly; int this_distortion, s; - int64_t best_rd = INT64_MAX, this_rd; + int64_t best_rd = LLONG_MAX, this_rd; /* Y Search for 32x32 intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { @@ -2191,7 +2191,7 @@ static int64_t rd_pick_intra_sb64uv_mode(VP9_COMP *cpi, int *skippable) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - int64_t best_rd = INT64_MAX, this_rd; + int64_t best_rd = LLONG_MAX, this_rd; int this_rate_tokenonly, this_rate; int this_distortion, s; From 4987c0f07e1791099fe87199b27a2804079f62f0 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Fri, 11 Jan 2013 14:54:14 -0800 Subject: [PATCH 62/77] Initial sse2 version of the wide loopfilters Updated the rtcd_defs and used the sse2 uv version of the loopfilter. The performance improved by ~8% for the test clip used. Change-Id: I5a0bca3b6674198d40ca4a77b8cc722ddde79c36 --- vp9/common/vp9_loopfilter.h | 17 +++++++++++------ vp9/common/vp9_loopfilter_filters.c | 2 +- vp9/common/vp9_rtcd_defs.sh | 7 +++++-- vp9/common/x86/vp9_loopfilter_x86.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 9 deletions(-) diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h index 8b752aa5c..53ec336a4 100644 --- a/vp9/common/vp9_loopfilter.h +++ b/vp9/common/vp9_loopfilter.h @@ -92,10 +92,15 @@ void vp9_loop_filter_partial_frame(struct VP9Common *cm, void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl); -extern void vp9_lpf_mbv_w(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - struct loop_filter_info *lfi); -extern void vp9_lpf_mbh_w(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - struct loop_filter_info *lfi); +void vp9_mb_lpf_horizontal_edge_w(unsigned char *s, int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count); + +void vp9_mb_lpf_vertical_edge_w(unsigned char *s, int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, + int count); #endif // VP9_COMMON_VP9_LOOPFILTER_H_ diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c index 18f0fea13..ed7b422df 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vp9/common/vp9_loopfilter_filters.c @@ -596,7 +596,7 @@ static __inline void wide_mbfilter(int8_t mask, uint8_t hev, } } -static void vp9_mb_lpf_horizontal_edge_w +void vp9_mb_lpf_horizontal_edge_w ( unsigned char *s, int p, diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 33ed5a490..d8517bbfa 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -233,11 +233,14 @@ vp9_loop_filter_simple_bh_c=vp9_loop_filter_bhs_c vp9_loop_filter_simple_bh_mmx=vp9_loop_filter_bhs_mmx vp9_loop_filter_simple_bh_sse2=vp9_loop_filter_bhs_sse2 +if [ "$CONFIG_WIDERLPF" = "yes" ]; then prototype void vp9_lpf_mbh_w "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi" -specialize vp9_lpf_mbh_w +specialize vp9_lpf_mbh_w sse2 prototype void vp9_lpf_mbv_w "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi" -specialize vp9_lpf_mbv_w +specialize vp9_lpf_mbv_w sse2 +fi + # # post proc # diff --git a/vp9/common/x86/vp9_loopfilter_x86.c b/vp9/common/x86/vp9_loopfilter_x86.c index 61b1c77da..9f46eec4c 100644 --- a/vp9/common/x86/vp9_loopfilter_x86.c +++ b/vp9/common/x86/vp9_loopfilter_x86.c @@ -604,6 +604,20 @@ void vp9_loop_filter_mbh_sse2(unsigned char *y_ptr, lfi->lim, lfi->hev_thr, v_ptr); } +#if CONFIG_WIDERLPF +void vp9_lpf_mbh_w_sse2(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + struct loop_filter_info *lfi) { + vp9_mb_lpf_horizontal_edge_w(y_ptr, y_stride, + lfi->mblim, lfi->lim, lfi->hev_thr, 2); + + /* u,v */ + if (u_ptr) + vp9_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, + lfi->lim, lfi->hev_thr, v_ptr); +} +#endif + void vp9_loop_filter_bh8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { @@ -624,6 +638,20 @@ void vp9_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, lfi->lim, lfi->hev_thr, v_ptr); } +#if CONFIG_WIDERLPF +void vp9_lpf_mbv_w_sse2(unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr, int y_stride, int uv_stride, + struct loop_filter_info *lfi) { + vp9_mb_lpf_vertical_edge_w(y_ptr, y_stride, + lfi->mblim, lfi->lim, lfi->hev_thr, 2); + + /* u,v */ + if (u_ptr) + vp9_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, + lfi->lim, lfi->hev_thr, v_ptr); +} +#endif + void vp9_loop_filter_bv8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { From bc45f2319299913cce714be97db8297ca6ee639e Mon Sep 17 00:00:00 2001 From: Frank Galligan Date: Fri, 11 Jan 2013 15:34:05 -0800 Subject: [PATCH 63/77] Upstream changes from Chromium Android Clang build. See https://codereview.chromium.org/11875006/ Change-Id: Ied2a17df2b3222635f84aef120eaa9feb53750d2 --- vp8/encoder/onyx_if.c | 2 +- vp8/vp8_cx_iface.c | 4 +++- vp9/vp9_dx_iface.c | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 6858d411d..7eb7193bf 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -3466,7 +3466,7 @@ static void encode_frame_to_data_rate /* Note that we should not throw out a key frame (especially when * spatial resampling is enabled). */ - if ((cm->frame_type == KEY_FRAME)) + if (cm->frame_type == KEY_FRAME) { cpi->decimation_count = cpi->decimation_factor; } diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 83eecba5c..b985cb1b7 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -1178,7 +1178,9 @@ static vpx_codec_err_t vp8e_set_scalemode(vpx_codec_alg_priv_t *ctx, { int res; vpx_scaling_mode_t scalemode = *(vpx_scaling_mode_t *)data ; - res = vp8_set_internal_size(ctx->cpi, scalemode.h_scaling_mode, scalemode.v_scaling_mode); + res = vp8_set_internal_size(ctx->cpi, + (VPX_SCALING)scalemode.h_scaling_mode, + (VPX_SCALING)scalemode.v_scaling_mode); if (!res) { diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 321fdb9e0..2d7e41369 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -573,7 +573,8 @@ static vpx_codec_err_t vp9_set_reference(vpx_codec_alg_priv_t *ctx, image2yuvconfig(&frame->img, &sd); - return vp9_set_reference_dec(ctx->pbi, frame->frame_type, &sd); + return vp9_set_reference_dec(ctx->pbi, + (VP9_REFFRAME)frame->frame_type, &sd); } else return VPX_CODEC_INVALID_PARAM; @@ -591,7 +592,8 @@ static vpx_codec_err_t vp9_get_reference(vpx_codec_alg_priv_t *ctx, image2yuvconfig(&frame->img, &sd); - return vp9_get_reference_dec(ctx->pbi, frame->frame_type, &sd); + return vp9_get_reference_dec(ctx->pbi, + (VP9_REFFRAME)frame->frame_type, &sd); } else return VPX_CODEC_INVALID_PARAM; From 9a1d73d036d06ef4332ec6d0ff18f771843cd22d Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Fri, 11 Jan 2013 15:53:32 -0800 Subject: [PATCH 64/77] Add loop filtering for UV plane On block boundary within a MB when 8x8 block boundary only is filtered for Y. Change-Id: Ie1c804c877d199e78e2fecd8c2d3f1e114ce9ec1 --- vp9/common/vp9_loopfilter_filters.c | 16 ++++++++++++++++ vp9/common/x86/vp9_loopfilter_x86.c | 10 ++++++++++ 2 files changed, 26 insertions(+) diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c index 18f0fea13..adb10750e 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vp9/common/vp9_loopfilter_filters.c @@ -439,6 +439,14 @@ void vp9_loop_filter_bh8x8_c(uint8_t *y_ptr, uint8_t *u_ptr, struct loop_filter_info *lfi) { vp9_mbloop_filter_horizontal_edge_c( y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp9_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, + lfi->blim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp9_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, + lfi->blim, lfi->lim, lfi->hev_thr, 1); } void vp9_loop_filter_bhs_c(uint8_t *y_ptr, int y_stride, @@ -456,6 +464,14 @@ void vp9_loop_filter_bv8x8_c(uint8_t *y_ptr, uint8_t *u_ptr, struct loop_filter_info *lfi) { vp9_mbloop_filter_vertical_edge_c( y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + + if (u_ptr) + vp9_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, + lfi->blim, lfi->lim, lfi->hev_thr, 1); + + if (v_ptr) + vp9_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, + lfi->blim, lfi->lim, lfi->hev_thr, 1); } void vp9_loop_filter_bvs_c(uint8_t *y_ptr, int y_stride, diff --git a/vp9/common/x86/vp9_loopfilter_x86.c b/vp9/common/x86/vp9_loopfilter_x86.c index 61b1c77da..1ec7d5c48 100644 --- a/vp9/common/x86/vp9_loopfilter_x86.c +++ b/vp9/common/x86/vp9_loopfilter_x86.c @@ -609,6 +609,11 @@ void vp9_loop_filter_bh8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr, struct loop_filter_info *lfi) { vp9_mbloop_filter_horizontal_edge_sse2( y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); + + if (u_ptr) + vp9_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, + lfi->blim, lfi->lim, lfi->hev_thr, + v_ptr + 4 * uv_stride); } /* Vertical MB Filtering */ @@ -629,6 +634,11 @@ void vp9_loop_filter_bv8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr, struct loop_filter_info *lfi) { vp9_mbloop_filter_vertical_edge_sse2( y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); + + if (u_ptr) + vp9_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, + lfi->blim, lfi->lim, lfi->hev_thr, + v_ptr + 4); } /* Horizontal B Filtering */ From b20ce07d76d9b8fa2230bfccdd11324b565e39c0 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Fri, 11 Jan 2013 17:11:04 -0800 Subject: [PATCH 65/77] WIP: Added sse2 version of vp9_mb_lpf_horizontal_edge_w and vp9_mb_lpf_vertical_edge_w_sse2. This was quickly done so we can run some tests over the weekend. Future commits will optimize/refactor these functions further. The decoder performance improved by ~17% for the clip used. Change-Id: I612687cd5a7670ee840a0cbc3c68dc2b84d4af76 --- vp9/common/x86/vp9_loopfilter_x86.c | 517 +++++++++++++++++++++++++++- 1 file changed, 513 insertions(+), 4 deletions(-) diff --git a/vp9/common/x86/vp9_loopfilter_x86.c b/vp9/common/x86/vp9_loopfilter_x86.c index 9f46eec4c..3cf10ba20 100644 --- a/vp9/common/x86/vp9_loopfilter_x86.c +++ b/vp9/common/x86/vp9_loopfilter_x86.c @@ -85,6 +85,482 @@ void vp9_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, #endif #if HAVE_SSE2 + +#if CONFIG_WIDERLPF +void vp9_mb_lpf_horizontal_edge_w_sse2(unsigned char *s, + int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { + DECLARE_ALIGNED(16, unsigned char, flat2_op[7][16]); + DECLARE_ALIGNED(16, unsigned char, flat2_oq[7][16]); + + DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); + DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); + __m128i mask, hev, flat, flat2; + const __m128i zero = _mm_set1_epi16(0); + __m128i p7, p6, p5; + __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; + __m128i q5, q6, q7; + int i = 0; + const unsigned int extended_thresh = _thresh[0] * 0x01010101u; + const unsigned int extended_limit = _limit[0] * 0x01010101u; + const unsigned int extended_blimit = _blimit[0] * 0x01010101u; + const __m128i thresh = + _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_thresh), 0); + const __m128i limit = + _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_limit), 0); + const __m128i blimit = + _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_blimit), 0); + + p4 = _mm_loadu_si128((__m128i *)(s - 5 * p)); + p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); + p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); + p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); + p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); + q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); + q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); + q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); + q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); + q4 = _mm_loadu_si128((__m128i *)(s + 4 * p)); + { + const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), + _mm_subs_epu8(p0, p1)); + const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), + _mm_subs_epu8(q0, q1)); + const __m128i one = _mm_set1_epi8(1); + const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); + __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), + _mm_subs_epu8(q0, p0)); + __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), + _mm_subs_epu8(q1, p1)); + __m128i work; + flat = _mm_max_epu8(abs_p1p0, abs_q1q0); + hev = _mm_subs_epu8(flat, thresh); + hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); + + abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); + abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); + // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; + mask = _mm_max_epu8(flat, mask); + // mask |= (abs(p1 - p0) > limit) * -1; + // mask |= (abs(q1 - q0) > limit) * -1; + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1), + _mm_subs_epu8(p1, p2)), + _mm_or_si128(_mm_subs_epu8(p3, p2), + _mm_subs_epu8(p2, p3))); + mask = _mm_max_epu8(work, mask); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1), + _mm_subs_epu8(q1, q2)), + _mm_or_si128(_mm_subs_epu8(q3, q2), + _mm_subs_epu8(q2, q3))); + mask = _mm_max_epu8(work, mask); + mask = _mm_subs_epu8(mask, limit); + mask = _mm_cmpeq_epi8(mask, zero); + + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0), + _mm_subs_epu8(p0, p2)), + _mm_or_si128(_mm_subs_epu8(q2, q0), + _mm_subs_epu8(q0, q2))); + flat = _mm_max_epu8(work, flat); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0), + _mm_subs_epu8(p0, p3)), + _mm_or_si128(_mm_subs_epu8(q3, q0), + _mm_subs_epu8(q0, q3))); + flat = _mm_max_epu8(work, flat); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p4, p0), + _mm_subs_epu8(p0, p4)), + _mm_or_si128(_mm_subs_epu8(q4, q0), + _mm_subs_epu8(q0, q4))); + flat = _mm_max_epu8(work, flat); + flat = _mm_subs_epu8(flat, one); + flat = _mm_cmpeq_epi8(flat, zero); + flat = _mm_and_si128(flat, mask); + } + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // calculate flat2 + p4 = _mm_loadu_si128((__m128i *)(s - 8 * p)); + p3 = _mm_loadu_si128((__m128i *)(s - 7 * p)); + p2 = _mm_loadu_si128((__m128i *)(s - 6 * p)); + p1 = _mm_loadu_si128((__m128i *)(s - 5 * p)); +// p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); +// q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); + q1 = _mm_loadu_si128((__m128i *)(s + 4 * p)); + q2 = _mm_loadu_si128((__m128i *)(s + 5 * p)); + q3 = _mm_loadu_si128((__m128i *)(s + 6 * p)); + q4 = _mm_loadu_si128((__m128i *)(s + 7 * p)); + + { + const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), + _mm_subs_epu8(p0, p1)); + const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), + _mm_subs_epu8(q0, q1)); + const __m128i one = _mm_set1_epi8(1); + __m128i work; + flat2 = _mm_max_epu8(abs_p1p0, abs_q1q0); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0), + _mm_subs_epu8(p0, p2)), + _mm_or_si128(_mm_subs_epu8(q2, q0), + _mm_subs_epu8(q0, q2))); + flat2 = _mm_max_epu8(work, flat2); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0), + _mm_subs_epu8(p0, p3)), + _mm_or_si128(_mm_subs_epu8(q3, q0), + _mm_subs_epu8(q0, q3))); + flat2 = _mm_max_epu8(work, flat2); + work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p4, p0), + _mm_subs_epu8(p0, p4)), + _mm_or_si128(_mm_subs_epu8(q4, q0), + _mm_subs_epu8(q0, q4))); + flat2 = _mm_max_epu8(work, flat2); + flat2 = _mm_subs_epu8(flat2, one); + flat2 = _mm_cmpeq_epi8(flat2, zero); + flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask + } + // calculate flat2 + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + { + const __m128i four = _mm_set1_epi16(4); + unsigned char *src = s; + i = 0; + do { + __m128i workp_a, workp_b, workp_shft; + p4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 5 * p)), zero); + p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero); + p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero); + p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero); + p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero); + q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero); + q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero); + q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero); + q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero); + q4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 4 * p)), zero); + + workp_a = _mm_add_epi16(_mm_add_epi16(p4, p3), _mm_add_epi16(p2, p1)); + workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); + workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p4); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op2[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op1[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p4), q2); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_op0[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq0[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q4); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq1[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q4); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); + _mm_storel_epi64((__m128i *)&flat_oq2[i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + src += 8; + } while (++i < 2); + } + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // wide flat + // TODO(slavarnway): interleave with the flat pixel calculations (see above) + { + const __m128i eight = _mm_set1_epi16(8); + unsigned char *src = s; + int i = 0; + do { + __m128i workp_a, workp_b, workp_shft; + p7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 8 * p)), zero); + p6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 7 * p)), zero); + p5 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 6 * p)), zero); + p4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 5 * p)), zero); + p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero); + p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero); + p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero); + p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero); + q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero); + q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero); + q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero); + q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero); + q4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 4 * p)), zero); + q5 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 5 * p)), zero); + q6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 6 * p)), zero); + q7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 7 * p)), zero); + + + workp_a = _mm_sub_epi16(_mm_slli_epi16(p7, 3), p7); // p7 * 7 + workp_a = _mm_add_epi16(_mm_slli_epi16(p6, 1), workp_a); + workp_b = _mm_add_epi16(_mm_add_epi16(p5, p4), _mm_add_epi16(p3, p2)); + workp_a = _mm_add_epi16(_mm_add_epi16(p1, p0), workp_a); + workp_b = _mm_add_epi16(_mm_add_epi16(q0, eight), workp_b); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_op[6][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p5); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p6), q1); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_op[5][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p4); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p5), q2); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_op[4][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p3); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p4), q3); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_op[3][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p2); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p3), q4); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_op[2][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p1); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p2), q5); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_op[1][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p0); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), q6); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_op[0][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), q0); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q7); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_oq[0][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p6), q1); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q7); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_oq[1][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p5), q2); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q7); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_oq[2][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p4), q3); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q2), q7); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_oq[3][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q4); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q3), q7); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_oq[4][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q5); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q4), q7); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_oq[5][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q6); + workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q5), q7); + workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4); + _mm_storel_epi64((__m128i *)&flat2_oq[6][i*8], + _mm_packus_epi16(workp_shft, workp_shft)); + + src += 8; + } while (++i < 2); + } + // wide flat + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + // lp filter + { + const __m128i t4 = _mm_set1_epi8(4); + const __m128i t3 = _mm_set1_epi8(3); + const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i te0 = _mm_set1_epi8(0xe0); + const __m128i t1f = _mm_set1_epi8(0x1f); + const __m128i t1 = _mm_set1_epi8(0x1); + const __m128i t7f = _mm_set1_epi8(0x7f); + + __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)), + t80); + __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)), + t80); + __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)), + t80); + __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)), + t80); + __m128i filt; + __m128i work_a; + __m128i filter1, filter2; + + filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); + work_a = _mm_subs_epi8(qs0, ps0); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + filt = _mm_adds_epi8(filt, work_a); + /* (vp9_filter + 3 * (qs0 - ps0)) & mask */ + filt = _mm_and_si128(filt, mask); + + filter1 = _mm_adds_epi8(filt, t4); + filter2 = _mm_adds_epi8(filt, t3); + + /* Filter1 >> 3 */ + work_a = _mm_cmpgt_epi8(zero, filter1); + filter1 = _mm_srli_epi16(filter1, 3); + work_a = _mm_and_si128(work_a, te0); + filter1 = _mm_and_si128(filter1, t1f); + filter1 = _mm_or_si128(filter1, work_a); + + /* Filter2 >> 3 */ + work_a = _mm_cmpgt_epi8(zero, filter2); + filter2 = _mm_srli_epi16(filter2, 3); + work_a = _mm_and_si128(work_a, te0); + filter2 = _mm_and_si128(filter2, t1f); + filter2 = _mm_or_si128(filter2, work_a); + + /* filt >> 1 */ + filt = _mm_adds_epi8(filter1, t1); + work_a = _mm_cmpgt_epi8(zero, filt); + filt = _mm_srli_epi16(filt, 1); + work_a = _mm_and_si128(work_a, t80); + filt = _mm_and_si128(filt, t7f); + filt = _mm_or_si128(filt, work_a); + + filt = _mm_andnot_si128(hev, filt); + + ps0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); + ps1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); + qs0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); + qs1 = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); + + // write out op6 - op3 + { + unsigned char *dst = (s - 7 * p); + for (i = 6; i > 2; i--) { + __m128i flat2_output; + work_a = _mm_loadu_si128((__m128i *)dst); + flat2_output = _mm_load_si128((__m128i *)flat2_op[i]); + work_a = _mm_andnot_si128(flat2, work_a); + flat2_output = _mm_and_si128(flat2, flat2_output); + work_a = _mm_or_si128(work_a, flat2_output); + _mm_storeu_si128((__m128i *)dst, work_a); + dst += p; + } + } + + work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); + p2 = _mm_load_si128((__m128i *)flat_op2); + work_a = _mm_andnot_si128(flat, work_a); + p2 = _mm_and_si128(flat, p2); + work_a = _mm_or_si128(work_a, p2); + p2 = _mm_load_si128((__m128i *)flat2_op[2]); + work_a = _mm_andnot_si128(flat2, work_a); + p2 = _mm_and_si128(flat2, p2); + p2 = _mm_or_si128(work_a, p2); + _mm_storeu_si128((__m128i *)(s - 3 * p), p2); + + p1 = _mm_load_si128((__m128i *)flat_op1); + work_a = _mm_andnot_si128(flat, ps1); + p1 = _mm_and_si128(flat, p1); + work_a = _mm_or_si128(work_a, p1); + p1 = _mm_load_si128((__m128i *)flat2_op[1]); + work_a = _mm_andnot_si128(flat2, work_a); + p1 = _mm_and_si128(flat2, p1); + p1 = _mm_or_si128(work_a, p1); + _mm_storeu_si128((__m128i *)(s - 2 * p), p1); + + p0 = _mm_load_si128((__m128i *)flat_op0); + work_a = _mm_andnot_si128(flat, ps0); + p0 = _mm_and_si128(flat, p0); + work_a = _mm_or_si128(work_a, p0); + p0 = _mm_load_si128((__m128i *)flat2_op[0]); + work_a = _mm_andnot_si128(flat2, work_a); + p0 = _mm_and_si128(flat2, p0); + p0 = _mm_or_si128(work_a, p0); + _mm_storeu_si128((__m128i *)(s - 1 * p), p0); + + q0 = _mm_load_si128((__m128i *)flat_oq0); + work_a = _mm_andnot_si128(flat, qs0); + q0 = _mm_and_si128(flat, q0); + work_a = _mm_or_si128(work_a, q0); + q0 = _mm_load_si128((__m128i *)flat2_oq[0]); + work_a = _mm_andnot_si128(flat2, work_a); + q0 = _mm_and_si128(flat2, q0); + q0 = _mm_or_si128(work_a, q0); + _mm_storeu_si128((__m128i *)(s - 0 * p), q0); + + q1 = _mm_load_si128((__m128i *)flat_oq1); + work_a = _mm_andnot_si128(flat, qs1); + q1 = _mm_and_si128(flat, q1); + work_a = _mm_or_si128(work_a, q1); + q1 = _mm_load_si128((__m128i *)flat2_oq[1]); + work_a = _mm_andnot_si128(flat2, work_a); + q1 = _mm_and_si128(flat2, q1); + q1 = _mm_or_si128(work_a, q1); + _mm_storeu_si128((__m128i *)(s + 1 * p), q1); + + work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); + q2 = _mm_load_si128((__m128i *)flat_oq2); + work_a = _mm_andnot_si128(flat, work_a); + q2 = _mm_and_si128(flat, q2); + work_a = _mm_or_si128(work_a, q2); + q2 = _mm_load_si128((__m128i *)flat2_oq[2]); + work_a = _mm_andnot_si128(flat2, work_a); + q2 = _mm_and_si128(flat2, q2); + q2 = _mm_or_si128(work_a, q2); + _mm_storeu_si128((__m128i *)(s + 2 * p), q2); + + // write out oq3 - oq7 + { + unsigned char *dst = (s + 3 * p); + for (i = 3; i < 7; i++) { + __m128i flat2_output; + work_a = _mm_loadu_si128((__m128i *)dst); + flat2_output = _mm_load_si128((__m128i *)flat2_oq[i]); + work_a = _mm_andnot_si128(flat2, work_a); + flat2_output = _mm_and_si128(flat2, flat2_output); + work_a = _mm_or_si128(work_a, flat2_output); + _mm_storeu_si128((__m128i *)dst, work_a); + dst += p; + } + } + } +} +#endif + void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s, int p, const unsigned char *_blimit, @@ -562,6 +1038,39 @@ void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s, transpose(src, 16, dst, p, 2); } +#if CONFIG_WIDERLPF +void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s, + int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh) { + DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256); + unsigned char *src[4]; + unsigned char *dst[4]; + + /* Transpose 16x16 */ + transpose8x16(s - 8, s - 8 + p * 8, p, t_dst, 16); + transpose8x16(s, s + p * 8, p, t_dst + 16 * 8, 16); + + /* Loop filtering */ + vp9_mb_lpf_horizontal_edge_w_sse2(t_dst + 8 * 16, 16, blimit, limit, + thresh); + + src[0] = t_dst; + src[1] = t_dst + 8 * 16; + src[2] = t_dst + 8; + src[3] = t_dst + 8 * 16 + 8; + + dst[0] = s - 8; + dst[1] = s - 8 + 8; + dst[2] = s - 8 + p * 8; + dst[3] = s - 8 + p * 8 + 8; + + /* Transpose 16x16 */ + transpose(src, 16, dst, p, 4); +} +#endif + void vp9_mbloop_filter_vertical_edge_uv_sse2(unsigned char *u, int p, const unsigned char *blimit, @@ -608,8 +1117,8 @@ void vp9_loop_filter_mbh_sse2(unsigned char *y_ptr, void vp9_lpf_mbh_w_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { - vp9_mb_lpf_horizontal_edge_w(y_ptr, y_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 2); + vp9_mb_lpf_horizontal_edge_w_sse2(y_ptr, y_stride, + lfi->mblim, lfi->lim, lfi->hev_thr); /* u,v */ if (u_ptr) @@ -642,8 +1151,8 @@ void vp9_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, void vp9_lpf_mbv_w_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { - vp9_mb_lpf_vertical_edge_w(y_ptr, y_stride, - lfi->mblim, lfi->lim, lfi->hev_thr, 2); + vp9_mb_lpf_vertical_edge_w_sse2(y_ptr, y_stride, + lfi->mblim, lfi->lim, lfi->hev_thr); /* u,v */ if (u_ptr) From 516db21c2c903a7d9b0b5fc156277e9bb538ade9 Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Wed, 9 Jan 2013 06:26:54 -0800 Subject: [PATCH 66/77] Further enhancements/fixes on dct/dwt hybrid txfm Fixes some scaling issues. Adds an option to only compute the dct on the low-low subband for 32x32 and 64x64 blocks using only a single 16x16 dct after 1 and 2 wavelet decomposition levels respectively. Also adds an option to use a 8x8 dct as building block. Currenlty with the 2/6 filter and with a single 16x16 dct on the low low band, the reuslts compared to full 32x32 dct is as follows: derf: -0.15% yt: -0.29% std-hd: -0.18% hd: -0.6% These are my current recommended settings, since the 2/6 filter is very simple. Results with 8x8 dct are about 0.3% worse. Change-Id: I00100cdc96e32deced591985785ef0d06f325e44 --- vp9/common/vp9_blockd.h | 12 ++ vp9/common/vp9_entropy.c | 412 ++++++++++++++++++++++++++++++++++++++- vp9/common/vp9_idctllm.c | 220 ++++++++++++++++++--- vp9/encoder/vp9_dct.c | 204 ++++++++++++++++--- 4 files changed, 795 insertions(+), 53 deletions(-) diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index b5f9bd2c4..18c2ae0a8 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -47,6 +47,18 @@ void vpx_log(const char *format, ...); #define MAX_MV_REFS 9 #define MAX_MV_REF_CANDIDATES 4 +#if CONFIG_DWTDCTHYBRID +#define DWT_MAX_LENGTH 64 +#define DWT_TYPE 26 // 26/53/97 +#define DWT_PRECISION_BITS 2 +#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2) + +#define DWTDCT16X16 0 +#define DWTDCT16X16_LEAN 1 +#define DWTDCT8X8 2 +#define DWTDCT_TYPE DWTDCT16X16_LEAN +#endif + typedef struct { int r, c; } POS; diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 2200cc3f9..352e17c0c 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -70,7 +70,8 @@ DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]) = { 12, 13, 14, 15 }; -DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5, +DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]) = { + 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, @@ -143,6 +144,214 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = { }; #if CONFIG_DWTDCTHYBRID + +#if DWTDCT_TYPE == DWTDCT16X16_LEAN +DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { + 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { + 0, 1, 32, 64, 33, 2, 3, 34, + 65, 96, 128, 97, 66, 35, 4, 5, + 36, 67, 98, 129, 160, 192, 161, 130, + 99, 68, 37, 6, 7, 38, 69, 100, + 131, 162, 193, 224, 256, 225, 194, 163, + 132, 101, 70, 39, 8, 9, 40, 71, + 102, 133, 164, 195, 226, 257, 288, 320, + 289, 258, 227, 196, 165, 134, 103, 72, + 41, 10, 11, 42, 73, 104, 135, 166, + 197, 228, 259, 290, 321, 352, 384, 353, + 322, 291, 260, 229, 198, 167, 136, 105, + 74, 43, 12, 13, 44, 75, 106, 137, + 168, 199, 230, 261, 292, 323, 354, 385, + 416, 448, 417, 386, 355, 324, 293, 262, + 231, 200, 169, 138, 107, 76, 45, 14, + 15, 46, 77, 108, 139, 170, 201, 232, + 263, 294, 325, 356, 387, 418, 449, 480, + 481, 450, 419, 388, 357, 326, 295, 264, + 233, 202, 171, 140, 109, 78, 47, 79, + 110, 141, 172, 203, 234, 265, 296, 327, + 358, 389, 420, 451, 482, 483, 452, 421, + 390, 359, 328, 297, 266, 235, 204, 173, + 142, 111, 143, 174, 205, 236, 267, 298, + 329, 360, 391, 422, 453, 484, 485, 454, + 423, 392, 361, 330, 299, 268, 237, 206, + 175, 207, 238, 269, 300, 331, 362, 393, + 424, 455, 486, 487, 456, 425, 394, 363, + 332, 301, 270, 239, 271, 302, 333, 364, + 395, 426, 457, 488, 489, 458, 427, 396, + 365, 334, 303, 335, 366, 397, 428, 459, + 490, 491, 460, 429, 398, 367, 399, 430, + 461, 492, 493, 462, 431, 463, 494, 495, + + 16, 512, 528, 17, 513, 529, 48, 544, + 560, 80, 576, 592, 49, 545, 561, 18, + 514, 530, 19, 515, 531, 50, 546, 562, + 81, 577, 593, 112, 608, 624, 144, 640, + 656, 113, 609, 625, 82, 578, 594, 51, + 547, 563, 20, 516, 532, 21, 517, 533, + 52, 548, 564, 83, 579, 595, 114, 610, + 626, 145, 641, 657, 176, 672, 688, 208, + 704, 720, 177, 673, 689, 146, 642, 658, + 115, 611, 627, 84, 580, 596, 53, 549, + 565, 22, 518, 534, 23, 519, 535, 54, + 550, 566, 85, 581, 597, 116, 612, 628, + 147, 643, 659, 178, 674, 690, 209, 705, + 721, 240, 736, 752, 272, 768, 784, 241, + 737, 753, 210, 706, 722, 179, 675, 691, + 148, 644, 660, 117, 613, 629, 86, 582, + 598, 55, 551, 567, 24, 520, 536, 25, + 521, 537, 56, 552, 568, 87, 583, 599, + 118, 614, 630, 149, 645, 661, 180, 676, + 692, 211, 707, 723, 242, 738, 754, 273, + 769, 785, 304, 800, 816, 336, 832, 848, + 305, 801, 817, 274, 770, 786, 243, 739, + 755, 212, 708, 724, 181, 677, 693, 150, + 646, 662, 119, 615, 631, 88, 584, 600, + 57, 553, 569, 26, 522, 538, 27, 523, + 539, 58, 554, 570, 89, 585, 601, 120, + 616, 632, 151, 647, 663, 182, 678, 694, + 213, 709, 725, 244, 740, 756, 275, 771, + 787, 306, 802, 818, 337, 833, 849, 368, + 864, 880, 400, 896, 912, 369, 865, 881, + 338, 834, 850, 307, 803, 819, 276, 772, + 788, 245, 741, 757, 214, 710, 726, 183, + + 679, 695, 152, 648, 664, 121, 617, 633, + 90, 586, 602, 59, 555, 571, 28, 524, + 540, 29, 525, 541, 60, 556, 572, 91, + 587, 603, 122, 618, 634, 153, 649, 665, + 184, 680, 696, 215, 711, 727, 246, 742, + 758, 277, 773, 789, 308, 804, 820, 339, + 835, 851, 370, 866, 882, 401, 897, 913, + 432, 928, 944, 464, 960, 976, 433, 929, + 945, 402, 898, 914, 371, 867, 883, 340, + 836, 852, 309, 805, 821, 278, 774, 790, + 247, 743, 759, 216, 712, 728, 185, 681, + 697, 154, 650, 666, 123, 619, 635, 92, + 588, 604, 61, 557, 573, 30, 526, 542, + 31, 527, 543, 62, 558, 574, 93, 589, + 605, 124, 620, 636, 155, 651, 667, 186, + 682, 698, 217, 713, 729, 248, 744, 760, + 279, 775, 791, 310, 806, 822, 341, 837, + 853, 372, 868, 884, 403, 899, 915, 434, + 930, 946, 465, 961, 977, 496, 992, 1008, + 497, 993, 1009, 466, 962, 978, 435, 931, + 947, 404, 900, 916, 373, 869, 885, 342, + 838, 854, 311, 807, 823, 280, 776, 792, + 249, 745, 761, 218, 714, 730, 187, 683, + 699, 156, 652, 668, 125, 621, 637, 94, + 590, 606, 63, 559, 575, 95, 591, 607, + 126, 622, 638, 157, 653, 669, 188, 684, + 700, 219, 715, 731, 250, 746, 762, 281, + 777, 793, 312, 808, 824, 343, 839, 855, + 374, 870, 886, 405, 901, 917, 436, 932, + 948, 467, 963, 979, 498, 994, 1010, 499, + 995, 1011, 468, 964, 980, 437, 933, 949, + 406, 902, 918, 375, 871, 887, 344, 840, + + 856, 313, 809, 825, 282, 778, 794, 251, + 747, 763, 220, 716, 732, 189, 685, 701, + 158, 654, 670, 127, 623, 639, 159, 655, + 671, 190, 686, 702, 221, 717, 733, 252, + 748, 764, 283, 779, 795, 314, 810, 826, + 345, 841, 857, 376, 872, 888, 407, 903, + 919, 438, 934, 950, 469, 965, 981, 500, + 996, 1012, 501, 997, 1013, 470, 966, 982, + 439, 935, 951, 408, 904, 920, 377, 873, + 889, 346, 842, 858, 315, 811, 827, 284, + 780, 796, 253, 749, 765, 222, 718, 734, + 191, 687, 703, 223, 719, 735, 254, 750, + 766, 285, 781, 797, 316, 812, 828, 347, + 843, 859, 378, 874, 890, 409, 905, 921, + 440, 936, 952, 471, 967, 983, 502, 998, + 1014, 503, 999, 1015, 472, 968, 984, 441, + 937, 953, 410, 906, 922, 379, 875, 891, + 348, 844, 860, 317, 813, 829, 286, 782, + 798, 255, 751, 767, 287, 783, 799, 318, + 814, 830, 349, 845, 861, 380, 876, 892, + 411, 907, 923, 442, 938, 954, 473, 969, + 985, 504, 1000, 1016, 505, 1001, 1017, 474, + 970, 986, 443, 939, 955, 412, 908, 924, + 381, 877, 893, 350, 846, 862, 319, 815, + 831, 351, 847, 863, 382, 878, 894, 413, + 909, 925, 444, 940, 956, 475, 971, 987, + 506, 1002, 1018, 507, 1003, 1019, 476, 972, + 988, 445, 941, 957, 414, 910, 926, 383, + 879, 895, 415, 911, 927, 446, 942, 958, + 477, 973, 989, 508, 1004, 1020, 509, 1005, + 1021, 478, 974, 990, 447, 943, 959, 479, + 975, 991, 510, 1006, 1022, 511, 1007, 1023, +}; + +#elif DWTDCT_TYPE == DWTDCT16X16 + DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, 6, 6, @@ -351,7 +560,206 @@ DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { 975, 991, 510, 1006, 1022, 511, 1007, 1023, }; -#else // CONFIG_DWTDCTHYBRID +#elif DWTDCT_TYPE == DWTDCT8X8 + +DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { + 0, 1, 2, 3, 5, 4, 4, 5, + 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + + 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = { + 0, 1, 32, 64, 33, 2, 3, 34, + 65, 96, 128, 97, 66, 35, 4, 5, + 36, 67, 98, 129, 160, 192, 161, 130, + 99, 68, 37, 6, 7, 38, 69, 100, + 131, 162, 193, 224, 225, 194, 163, 132, + 101, 70, 39, 71, 102, 133, 164, 195, + 226, 227, 196, 165, 134, 103, 135, 166, + 197, 228, 229, 198, 167, 199, 230, 231, + + 8, 256, 264, 9, 257, 265, 40, 288, 296, 72, 320, 328, + 41, 289, 297, 10, 258, 266, 11, 259, 267, 42, 290, 298, + 73, 321, 329, 104, 352, 360, 136, 384, 392, 105, 353, 361, + 74, 322, 330, 43, 291, 299, 12, 260, 268, 13, 261, 269, + 44, 292, 300, 75, 323, 331, 106, 354, 362, 137, 385, 393, + 168, 416, 424, 200, 448, 456, 169, 417, 425, 138, 386, 394, + 107, 355, 363, 76, 324, 332, 45, 293, 301, 14, 262, 270, + 15, 263, 271, 46, 294, 302, 77, 325, 333, 108, 356, 364, + 139, 387, 395, 170, 418, 426, 201, 449, 457, 232, 480, 488, + 233, 481, 489, 202, 450, 458, 171, 419, 427, 140, 388, 396, + 109, 357, 365, 78, 326, 334, 47, 295, 303, 79, 327, 335, + 110, 358, 366, 141, 389, 397, 172, 420, 428, 203, 451, 459, + 234, 482, 490, 235, 483, 491, 204, 452, 460, 173, 421, 429, + 142, 390, 398, 111, 359, 367, 143, 391, 399, 174, 422, 430, + 205, 453, 461, 236, 484, 492, 237, 485, 493, 206, 454, 462, + 175, 423, 431, 207, 455, 463, 238, 486, 494, 239, 487, 495, + + 16, 512, 528, 17, 513, 529, 18, 514, + 530, 19, 515, 531, 20, 516, 532, 21, + 517, 533, 22, 518, 534, 23, 519, 535, + 24, 520, 536, 25, 521, 537, 26, 522, + 538, 27, 523, 539, 28, 524, 540, 29, + 525, 541, 30, 526, 542, 31, 527, 543, + 48, 544, 560, 49, 545, 561, 50, 546, + 562, 51, 547, 563, 52, 548, 564, 53, + 549, 565, 54, 550, 566, 55, 551, 567, + 56, 552, 568, 57, 553, 569, 58, 554, + 570, 59, 555, 571, 60, 556, 572, 61, + 557, 573, 62, 558, 574, 63, 559, 575, + 80, 576, 592, 81, 577, 593, 82, 578, + 594, 83, 579, 595, 84, 580, 596, 85, + 581, 597, 86, 582, 598, 87, 583, 599, + 88, 584, 600, 89, 585, 601, 90, 586, + 602, 91, 587, 603, 92, 588, 604, 93, + 589, 605, 94, 590, 606, 95, 591, 607, + 112, 608, 624, 113, 609, 625, 114, 610, + 626, 115, 611, 627, 116, 612, 628, 117, + 613, 629, 118, 614, 630, 119, 615, 631, + 120, 616, 632, 121, 617, 633, 122, 618, + 634, 123, 619, 635, 124, 620, 636, 125, + 621, 637, 126, 622, 638, 127, 623, 639, + 144, 640, 656, 145, 641, 657, 146, 642, + 658, 147, 643, 659, 148, 644, 660, 149, + 645, 661, 150, 646, 662, 151, 647, 663, + 152, 648, 664, 153, 649, 665, 154, 650, + 666, 155, 651, 667, 156, 652, 668, 157, + 653, 669, 158, 654, 670, 159, 655, 671, + 176, 672, 688, 177, 673, 689, 178, 674, + 690, 179, 675, 691, 180, 676, 692, 181, + 677, 693, 182, 678, 694, 183, 679, 695, + 184, 680, 696, 185, 681, 697, 186, 682, + 698, 187, 683, 699, 188, 684, 700, 189, + 685, 701, 190, 686, 702, 191, 687, 703, + 208, 704, 720, 209, 705, 721, 210, 706, + 722, 211, 707, 723, 212, 708, 724, 213, + 709, 725, 214, 710, 726, 215, 711, 727, + 216, 712, 728, 217, 713, 729, 218, 714, + 730, 219, 715, 731, 220, 716, 732, 221, + 717, 733, 222, 718, 734, 223, 719, 735, + 240, 736, 752, 241, 737, 753, 242, 738, + 754, 243, 739, 755, 244, 740, 756, 245, + 741, 757, 246, 742, 758, 247, 743, 759, + 248, 744, 760, 249, 745, 761, 250, 746, + 762, 251, 747, 763, 252, 748, 764, 253, + 749, 765, 254, 750, 766, 255, 751, 767, + 272, 768, 784, 273, 769, 785, 274, 770, + 786, 275, 771, 787, 276, 772, 788, 277, + 773, 789, 278, 774, 790, 279, 775, 791, + 280, 776, 792, 281, 777, 793, 282, 778, + 794, 283, 779, 795, 284, 780, 796, 285, + 781, 797, 286, 782, 798, 287, 783, 799, + 304, 800, 816, 305, 801, 817, 306, 802, + 818, 307, 803, 819, 308, 804, 820, 309, + 805, 821, 310, 806, 822, 311, 807, 823, + 312, 808, 824, 313, 809, 825, 314, 810, + 826, 315, 811, 827, 316, 812, 828, 317, + 813, 829, 318, 814, 830, 319, 815, 831, + 336, 832, 848, 337, 833, 849, 338, 834, + 850, 339, 835, 851, 340, 836, 852, 341, + 837, 853, 342, 838, 854, 343, 839, 855, + 344, 840, 856, 345, 841, 857, 346, 842, + 858, 347, 843, 859, 348, 844, 860, 349, + 845, 861, 350, 846, 862, 351, 847, 863, + 368, 864, 880, 369, 865, 881, 370, 866, + 882, 371, 867, 883, 372, 868, 884, 373, + 869, 885, 374, 870, 886, 375, 871, 887, + 376, 872, 888, 377, 873, 889, 378, 874, + 890, 379, 875, 891, 380, 876, 892, 381, + 877, 893, 382, 878, 894, 383, 879, 895, + 400, 896, 912, 401, 897, 913, 402, 898, + 914, 403, 899, 915, 404, 900, 916, 405, + 901, 917, 406, 902, 918, 407, 903, 919, + 408, 904, 920, 409, 905, 921, 410, 906, + 922, 411, 907, 923, 412, 908, 924, 413, + 909, 925, 414, 910, 926, 415, 911, 927, + 432, 928, 944, 433, 929, 945, 434, 930, + 946, 435, 931, 947, 436, 932, 948, 437, + 933, 949, 438, 934, 950, 439, 935, 951, + 440, 936, 952, 441, 937, 953, 442, 938, + 954, 443, 939, 955, 444, 940, 956, 445, + 941, 957, 446, 942, 958, 447, 943, 959, + 464, 960, 976, 465, 961, 977, 466, 962, + 978, 467, 963, 979, 468, 964, 980, 469, + 965, 981, 470, 966, 982, 471, 967, 983, + 472, 968, 984, 473, 969, 985, 474, 970, + 986, 475, 971, 987, 476, 972, 988, 477, + 973, 989, 478, 974, 990, 479, 975, 991, + 496, 992, 1008, 497, 993, 1009, 498, 994, + 1010, 499, 995, 1011, 500, 996, 1012, 501, + 997, 1013, 502, 998, 1014, 503, 999, 1015, + 504, 1000, 1016, 505, 1001, 1017, 506, 1002, + 1018, 507, 1003, 1019, 508, 1004, 1020, 509, + 1005, 1021, 510, 1006, 1022, 511, 1007, 1023, +}; +#endif + +#else DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = { 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6, diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 0e6a6447c..106ef9c19 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -1536,6 +1536,7 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { #if !CONFIG_DWTDCTHYBRID #define DownshiftMultiplyBy2(x) x * 2 #define DownshiftMultiply(x) x + static void idct16(double *input, double *output, int stride) { static const double C1 = 0.995184726672197; static const double C2 = 0.98078528040323; @@ -1738,6 +1739,7 @@ static void idct16(double *input, double *output, int stride) { output[stride*9] = step[6] - step[ 9]; output[stride*8] = step[7] - step[ 8]; } + static void butterfly_32_idct_1d(double *input, double *output, int stride) { static const double C1 = 0.998795456205; // cos(pi * 1 / 64) static const double C3 = 0.989176509965; // cos(pi * 3 / 64) @@ -1878,12 +1880,7 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { vp9_clear_system_state(); // Make it simd safe : __asm emms; } -#else // CONFIG_DWTDCTHYBRID - -#define DWT_MAX_LENGTH 32 -#define DWT_TYPE 26 // 26/53/97 -#define DWT_PRECISION_BITS 2 -#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2) +#else // !CONFIG_DWTDCTHYBRID #if DWT_TYPE == 53 @@ -2388,6 +2385,72 @@ static void vp9_short_idct16x16_c_f(int16_t *input, int16_t *output, int pitch, vp9_clear_system_state(); // Make it simd safe : __asm emms; } +static void idct8_1d(double *x) { + int i, j; + double t[8]; + static const double idctmat[64] = { + 0.35355339059327, 0.49039264020162, 0.46193976625564, 0.41573480615127, + 0.35355339059327, 0.2777851165098, 0.19134171618254, 0.097545161008064, + 0.35355339059327, 0.41573480615127, 0.19134171618254, -0.097545161008064, + -0.35355339059327, -0.49039264020161, -0.46193976625564, -0.2777851165098, + 0.35355339059327, 0.2777851165098, -0.19134171618254, -0.49039264020162, + -0.35355339059327, 0.097545161008064, 0.46193976625564, 0.41573480615127, + 0.35355339059327, 0.097545161008063, -0.46193976625564, -0.2777851165098, + 0.35355339059327, 0.41573480615127, -0.19134171618254, -0.49039264020162, + 0.35355339059327, -0.097545161008063, -0.46193976625564, 0.2777851165098, + 0.35355339059327, -0.41573480615127, -0.19134171618255, 0.49039264020162, + 0.35355339059327, -0.2777851165098, -0.19134171618254, 0.49039264020161, + -0.35355339059327, -0.097545161008064, 0.46193976625564, -0.41573480615127, + 0.35355339059327, -0.41573480615127, 0.19134171618254, 0.097545161008065, + -0.35355339059327, 0.49039264020162, -0.46193976625564, 0.2777851165098, + 0.35355339059327, -0.49039264020162, 0.46193976625564, -0.41573480615127, + 0.35355339059327, -0.2777851165098, 0.19134171618255, -0.097545161008064 + }; + for (i = 0; i < 8; ++i) { + t[i] = 0; + for (j = 0; j < 8; ++j) + t[i] += idctmat[i * 8 + j] * x[j]; + } + for (i = 0; i < 8; ++i) { + x[i] = t[i]; + } +} + +static void vp9_short_idct8x8_c_f(int16_t *coefs, int16_t *block, int pitch, + int scale) { + double X[8 * 8], Y[8]; + int i, j; + int shortpitch = pitch >> 1; + + vp9_clear_system_state(); // Make it simd safe : __asm emms; + { + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + X[i * 8 + j] = (double)coefs[i * shortpitch + j]; + } + } + for (i = 0; i < 8; i++) + idct8_1d(X + 8 * i); + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; ++j) + Y[j] = X[i + 8 * j]; + idct8_1d(Y); + for (j = 0; j < 8; ++j) + X[i + 8 * j] = Y[j]; + } + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + block[i * 8 + j] = (int16_t)round(X[i * 8 + j] / (8 >> scale)); + } + } + } + vp9_clear_system_state(); // Make it simd safe : __asm emms; +} + +#define multiply_bits(d, n) ((n) < 0 ? (d) >> (n) : (d) << (n)) + +#if DWTDCT_TYPE == DWTDCT16X16_LEAN + void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { // assume output is a 32x32 buffer // Temporary buffer to hold a 16x16 block for 16x16 inverse dct @@ -2396,7 +2459,47 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { int16_t buffer2[32 * 32]; // Note: pitch is in bytes, short_pitch is in short units const int short_pitch = pitch >> 1; - int i; + int i, j; + + // TODO(debargha): Implement more efficiently by adding output pitch + // argument to the idct16x16 function + vp9_short_idct16x16_c_f(input, buffer, pitch, + 1 + DWT_PRECISION_BITS); + for (i = 0; i < 16; ++i) { + vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16); + } + for (i = 0; i < 16; ++i) { + for (j = 16; j < 32; ++j) { + buffer2[i * 32 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); + } + } + for (i = 16; i < 32; ++i) { + for (j = 0; j < 32; ++j) { + buffer2[i * 32 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); + } + } +#if DWT_TYPE == 26 + dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32); +#elif DWT_TYPE == 97 + dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32); +#elif DWT_TYPE == 53 + dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32); +#endif +} + +#elif DWTDCT_TYPE == DWTDCT16X16 + +void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { + // assume output is a 32x32 buffer + // Temporary buffer to hold a 16x16 block for 16x16 inverse dct + int16_t buffer[16 * 16]; + // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt + int16_t buffer2[32 * 32]; + // Note: pitch is in bytes, short_pitch is in short units + const int short_pitch = pitch >> 1; + int i, j; // TODO(debargha): Implement more efficiently by adding output pitch // argument to the idct16x16 function @@ -2431,6 +2534,66 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { #endif } +#elif DWTDCT_TYPE == DWTDCT8X8 + +void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { + // assume output is a 32x32 buffer + // Temporary buffer to hold a 16x16 block for 16x16 inverse dct + int16_t buffer[8 * 8]; + // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt + int16_t buffer2[32 * 32]; + // Note: pitch is in bytes, short_pitch is in short units + const int short_pitch = pitch >> 1; + int i, j; + + // TODO(debargha): Implement more efficiently by adding output pitch + // argument to the idct16x16 function + vp9_short_idct8x8_c_f(input, buffer, pitch, + 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) { + vpx_memcpy(buffer2 + i * 32, buffer + i * 8, sizeof(*buffer2) * 8); + } + vp9_short_idct8x8_c_f(input + 8, buffer, pitch, + 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) { + vpx_memcpy(buffer2 + i * 32 + 8, buffer + i * 8, sizeof(*buffer2) * 8); + } + vp9_short_idct8x8_c_f(input + 8 * short_pitch, buffer, pitch, + 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) { + vpx_memcpy(buffer2 + i * 32 + 8 * 32, buffer + i * 8, + sizeof(*buffer2) * 8); + } + vp9_short_idct8x8_c_f(input + 8 * short_pitch + 8, buffer, pitch, + 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) { + vpx_memcpy(buffer2 + i * 32 + 8 * 33, buffer + i * 8, + sizeof(*buffer2) * 8); + } + for (i = 0; i < 16; ++i) { + for (j = 16; j < 32; ++j) { + buffer2[i * 32 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); + } + } + for (i = 16; i < 32; ++i) { + for (j = 0; j < 32; ++j) { + buffer2[i * 32 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2); + } + } +#if DWT_TYPE == 26 + dyadic_synthesize_26(2, 32, 32, buffer2, 32, output, 32); +#elif DWT_TYPE == 97 + dyadic_synthesize_97(2, 32, 32, buffer2, 32, output, 32); +#elif DWT_TYPE == 53 + dyadic_synthesize_53(2, 32, 32, buffer2, 32, output, 32); +#endif +} + +#endif + +#if CONFIG_TX64X64 void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { // assume output is a 64x64 buffer // Temporary buffer to hold a 16x16 block for 16x16 inverse dct @@ -2448,6 +2611,20 @@ void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { for (i = 0; i < 16; ++i) { vpx_memcpy(buffer2 + i * 64, buffer + i * 16, sizeof(*buffer2) * 16); } +#if DWTDCT_TYPE == DWTDCT16X16_LEAN + for (i = 0; i < 16; ++i) { + for (j = 16; j < 64; ++j) { + buffer2[i * 64 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); + } + } + for (i = 16; i < 64; ++i) { + for (j = 0; j < 64; ++j) { + buffer2[i * 64 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); + } + } +#elif DWTDCT_TYPE == DWTDCT16X16 vp9_short_idct16x16_c_f(input + 16, buffer, pitch, 2 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) { @@ -2467,33 +2644,19 @@ void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { } // Copying and scaling highest bands into buffer2 -#if DWT_PRECISION_BITS < 1 for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - buffer2[i * 64 + 32 + j] = - input[i * short_pitch + 32 + j] >> (1 - DWT_PRECISION_BITS); + for (j = 32; j < 64; ++j) { + buffer2[i * 64 + j] = + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); } } - for (i = 0; i < 32; ++i) { + for (i = 32; i < 64; ++i) { for (j = 0; j < 64; ++j) { buffer2[i * 64 + j] = - input[(i + 32) * short_pitch + j] >> (1 - DWT_PRECISION_BITS); + multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1); } } -#else - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - buffer2[i * 64 + 32 + j] = - input[i * short_pitch + 32 + j] << (DWT_PRECISION_BITS - 1); - } - } - for (i = 0; i < 32; ++i) { - for (j = 0; j < 64; ++j) { - buffer2[i * 64 + j] = - input[(i + 32) * short_pitch + j] << (DWT_PRECISION_BITS - 1); - } - } -#endif +#endif // DWTDCT_TYPE #if DWT_TYPE == 26 dyadic_synthesize_26(2, 64, 64, buffer2, 64, output, 64); @@ -2503,4 +2666,5 @@ void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) { dyadic_synthesize_53(2, 64, 64, buffer2, 64, output, 64); #endif } -#endif // CONFIG_DWTDCTHYBRID +#endif // CONFIG_TX64X64 +#endif // !CONFIG_DWTDCTHYBRID diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 46d8ca315..9f7673061 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -1686,11 +1686,6 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) { #else // CONFIG_DWTDCTHYBRID -#define DWT_MAX_LENGTH 64 -#define DWT_TYPE 26 // 26/53/97 -#define DWT_PRECISION_BITS 2 -#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2) - #if DWT_TYPE == 53 // Note: block length must be even for this implementation @@ -2139,10 +2134,97 @@ static void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch, vp9_clear_system_state(); // Make it simd safe : __asm emms; } +void vp9_short_fdct8x8_c_f(short *block, short *coefs, int pitch, int scale) { + int j1, i, j, k; + static int count = 0; + short x[8 * 8]; + float b[8]; + float b1[8]; + float d[8][8]; + float f0 = (float) .7071068; + float f1 = (float) .4903926; + float f2 = (float) .4619398; + float f3 = (float) .4157348; + float f4 = (float) .3535534; + float f5 = (float) .2777851; + float f6 = (float) .1913417; + float f7 = (float) .0975452; + pitch = pitch / 2; + for (i = 0, k = 0; i < 8; i++, k += pitch) { + for (j = 0; j < 8; j++) { + b[j] = (float)(block[k + j] << (3 - scale)); + } + /* Horizontal transform */ + for (j = 0; j < 4; j++) { + j1 = 7 - j; + b1[j] = b[j] + b[j1]; + b1[j1] = b[j] - b[j1]; + } + b[0] = b1[0] + b1[3]; + b[1] = b1[1] + b1[2]; + b[2] = b1[1] - b1[2]; + b[3] = b1[0] - b1[3]; + b[4] = b1[4]; + b[5] = (b1[6] - b1[5]) * f0; + b[6] = (b1[6] + b1[5]) * f0; + b[7] = b1[7]; + d[i][0] = (b[0] + b[1]) * f4; + d[i][4] = (b[0] - b[1]) * f4; + d[i][2] = b[2] * f6 + b[3] * f2; + d[i][6] = b[3] * f6 - b[2] * f2; + b1[4] = b[4] + b[5]; + b1[7] = b[7] + b[6]; + b1[5] = b[4] - b[5]; + b1[6] = b[7] - b[6]; + d[i][1] = b1[4] * f7 + b1[7] * f1; + d[i][5] = b1[5] * f3 + b1[6] * f5; + d[i][7] = b1[7] * f7 - b1[4] * f1; + d[i][3] = b1[6] * f3 - b1[5] * f5; + } + /* Vertical transform */ + for (i = 0; i < 8; i++) { + for (j = 0; j < 4; j++) { + j1 = 7 - j; + b1[j] = d[j][i] + d[j1][i]; + b1[j1] = d[j][i] - d[j1][i]; + } + b[0] = b1[0] + b1[3]; + b[1] = b1[1] + b1[2]; + b[2] = b1[1] - b1[2]; + b[3] = b1[0] - b1[3]; + b[4] = b1[4]; + b[5] = (b1[6] - b1[5]) * f0; + b[6] = (b1[6] + b1[5]) * f0; + b[7] = b1[7]; + d[0][i] = (b[0] + b[1]) * f4; + d[4][i] = (b[0] - b[1]) * f4; + d[2][i] = b[2] * f6 + b[3] * f2; + d[6][i] = b[3] * f6 - b[2] * f2; + b1[4] = b[4] + b[5]; + b1[7] = b[7] + b[6]; + b1[5] = b[4] - b[5]; + b1[6] = b[7] - b[6]; + d[1][i] = b1[4] * f7 + b1[7] * f1; + d[5][i] = b1[5] * f3 + b1[6] * f5; + d[7][i] = b1[7] * f7 - b1[4] * f1; + d[3][i] = b1[6] * f3 - b1[5] * f5; + } + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + *(coefs + j + i * 8) = (short) floor(d[i][j] + 0.5); + } + } + return; +} + +#define divide_bits(d, n) ((n) < 0 ? (d) << (n) : (d) >> (n)) + +#if DWTDCT_TYPE == DWTDCT16X16_LEAN + void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { // assume out is a 32x32 buffer short buffer[16 * 16]; - int i; + int i, j; const int short_pitch = pitch >> 1; #if DWT_TYPE == 26 dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32); @@ -2156,7 +2238,37 @@ void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16); + for (i = 0; i < 16; ++i) { + for (j = 16; j < 32; ++j) { + out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); + } + } + for (i = 16; i < 32; ++i) { + for (j = 0; j < 32; ++j) { + out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); + } + } +} +#elif DWTDCT_TYPE == DWTDCT16X16 + +void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { + // assume out is a 32x32 buffer + short buffer[16 * 16]; + int i, j; + const int short_pitch = pitch >> 1; +#if DWT_TYPE == 26 + dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32); +#elif DWT_TYPE == 97 + dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32); +#elif DWT_TYPE == 53 + dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32); +#endif + // TODO(debargha): Implement more efficiently by adding output pitch + // argument to the dct16x16 function + vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS); + for (i = 0; i < 16; ++i) + vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16); vp9_short_fdct16x16_c_f(out + 16, buffer, 64, 1 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) vpx_memcpy(out + i * 32 + 16, buffer + i * 16, sizeof(short) * 16); @@ -2170,6 +2282,52 @@ void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { vpx_memcpy(out + i * 32 + 33 * 16, buffer + i * 16, sizeof(short) * 16); } +#elif DWTDCT_TYPE == DWTDCT8X8 + +void vp9_short_fdct32x32_c(short *input, short *out, int pitch) { + // assume out is a 32x32 buffer + short buffer[8 * 8]; + int i, j; + const int short_pitch = pitch >> 1; +#if DWT_TYPE == 26 + dyadic_analyze_26(2, 32, 32, input, short_pitch, out, 32); +#elif DWT_TYPE == 97 + dyadic_analyze_97(2, 32, 32, input, short_pitch, out, 32); +#elif DWT_TYPE == 53 + dyadic_analyze_53(2, 32, 32, input, short_pitch, out, 32); +#endif + // TODO(debargha): Implement more efficiently by adding output pitch + // argument to the dct16x16 function + vp9_short_fdct8x8_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) + vpx_memcpy(out + i * 32, buffer + i * 8, sizeof(short) * 8); + + vp9_short_fdct8x8_c_f(out + 8, buffer, 64, 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) + vpx_memcpy(out + i * 32 + 8, buffer + i * 8, sizeof(short) * 8); + + vp9_short_fdct8x8_c_f(out + 32 * 8, buffer, 64, 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) + vpx_memcpy(out + i * 32 + 32 * 8, buffer + i * 8, sizeof(short) * 8); + + vp9_short_fdct8x8_c_f(out + 33 * 8, buffer, 64, 1 + DWT_PRECISION_BITS); + for (i = 0; i < 8; ++i) + vpx_memcpy(out + i * 32 + 33 * 8, buffer + i * 8, sizeof(short) * 8); + + for (i = 0; i < 16; ++i) { + for (j = 16; j < 32; ++j) { + out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); + } + } + for (i = 16; i < 32; ++i) { + for (j = 0; j < 32; ++j) { + out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2); + } + } +} + +#endif + #if CONFIG_TX64X64 void vp9_short_fdct64x64_c(short *input, short *out, int pitch) { // assume out is a 64x64 buffer @@ -2189,6 +2347,18 @@ void vp9_short_fdct64x64_c(short *input, short *out, int pitch) { for (i = 0; i < 16; ++i) vpx_memcpy(out + i * 64, buffer + i * 16, sizeof(short) * 16); +#if DWTDCT_TYPE == DWTDCT16X16_LEAN + for (i = 0; i < 16; ++i) { + for (j = 16; j < 48; ++j) { + out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); + } + } + for (i = 16; i < 64; ++i) { + for (j = 0; j < 64; ++j) { + out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); + } + } +#elif DWTDCT_TYPE == DWTDCT16X16 vp9_short_fdct16x16_c_f(out + 16, buffer, 128, 2 + DWT_PRECISION_BITS); for (i = 0; i < 16; ++i) vpx_memcpy(out + i * 64 + 16, buffer + i * 16, sizeof(short) * 16); @@ -2204,29 +2374,17 @@ void vp9_short_fdct64x64_c(short *input, short *out, int pitch) { // There is no dct used on the highest bands for now. // Need to scale these coeffs by a factor of 2/2^DWT_PRECISION_BITS // TODO(debargha): experiment with turning these coeffs to 0 -#if DWT_PRECISION_BITS < 1 for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - out[i * 64 + 32 + j] <<= (1 - DWT_PRECISION_BITS); + for (j = 32; j < 64; ++j) { + out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); } } - for (i = 0; i < 32; ++i) { + for (i = 32; i < 64; ++i) { for (j = 0; j < 64; ++j) { - out[i * 64 + j] <<= (1 - DWT_PRECISION_BITS); + out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1); } } -#else - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) { - out[i * 64 + 32 + j] >>= (DWT_PRECISION_BITS - 1); - } - } - for (i = 0; i < 32; ++i) { - for (j = 0; j < 64; ++j) { - out[i * 64 + j] >>= (DWT_PRECISION_BITS - 1); - } - } -#endif +#endif // DWTDCT_TYPE } #endif // CONFIG_TX64X64 #endif // CONFIG_DWTDCTHYBRID From 76ac5b39375203ebc0111a5907abea40678fedf0 Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Mon, 14 Jan 2013 12:10:11 -0800 Subject: [PATCH 67/77] Fix unused variable warnings Previous commit does not build cleanly on Jenkins with the DWT/DCT hybrid experiment enabled (--enable-dwtdcthybrid). Change-Id: Ia67e8f59d17ef2d5200ec6b90dfe6711ed6835a5 --- vp9/encoder/vp9_dct.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 9f7673061..bfde02ccb 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -2136,8 +2136,6 @@ static void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch, void vp9_short_fdct8x8_c_f(short *block, short *coefs, int pitch, int scale) { int j1, i, j, k; - static int count = 0; - short x[8 * 8]; float b[8]; float b1[8]; float d[8][8]; From 290b83ab627365ac697b477a8d0cfa8cbf6b90db Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 14 Jan 2013 12:43:12 -0800 Subject: [PATCH 68/77] Reset x->skip for each iteration in the RD loop. This prevents ill-defined behaviour, such as setting x->skip for a mode that is excluded because of frame-level flags (e.g. filter selection, compound prediction selection), then not breaking out of the RD loop because the mode is not allowed, but keeping the flag on. Whatever mode is iterated through next in the RD loop will then carry this flag, and all sort of bad stuff happens, such as x->skip being set on intra pred modes. Change-Id: I5bec46b36e38292174acb1c564b3caf00a9b4b9a --- vp9/encoder/vp9_rdopt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index a82670b6e..b73095863 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3765,8 +3765,6 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *returnintra = LLONG_MAX; - x->skip = 0; - mbmi->ref_frame = INTRA_FRAME; /* Initialize zbin mode boost for uv costing */ @@ -3809,6 +3807,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, rate_y = 0; rate_uv = 0; + x->skip = 0; + this_mode = vp9_mode_order[mode_index].mode; mbmi->mode = this_mode; mbmi->uv_mode = DC_PRED; From 7bcaac3e64c38de7112e6c05e3e7f19198cb9ae4 Mon Sep 17 00:00:00 2001 From: Adrian Grange Date: Mon, 14 Jan 2013 12:57:07 -0800 Subject: [PATCH 69/77] Merge prediction filter Removed the experimental flag from around the prediction filter. Change-Id: Ic1dd2db8fe8ac17ed5129f83094d4c5cdd5527d2 --- configure | 1 - vp9/common/vp9_entropymode.h | 4 ---- vp9/encoder/vp9_onyx_if.c | 4 ---- 3 files changed, 9 deletions(-) diff --git a/configure b/configure index 5506aec45..cc43b9713 100755 --- a/configure +++ b/configure @@ -239,7 +239,6 @@ HAVE_LIST=" EXPERIMENT_LIST=" csm comp_intra_pred - pred_filter lossless subpelrefmv new_mvref diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index 439fb3f43..01e5856f7 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -97,11 +97,7 @@ void vp9_kf_default_bmode_probs(vp9_prob dest[VP9_KF_BINTRAMODES] void vp9_adapt_mode_probs(struct VP9Common *); -#if CONFIG_PRED_FILTER #define VP9_SWITCHABLE_FILTERS 3 /* number of switchable filters */ -#else -#define VP9_SWITCHABLE_FILTERS 2 /* number of switchable filters */ -#endif extern const INTERPOLATIONFILTERTYPE vp9_switchable_interp [VP9_SWITCHABLE_FILTERS]; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 90f350004..b767ff0b3 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -49,11 +49,7 @@ extern void print_tree_update_probs(); static void set_default_lf_deltas(VP9_COMP *cpi); -#if CONFIG_PRED_FILTER #define DEFAULT_INTERP_FILTER SWITCHABLE -#else -#define DEFAULT_INTERP_FILTER EIGHTTAP -#endif #define SEARCH_BEST_FILTER 0 /* to search exhaustively for best filter */ From 113005b11dd4e0e31bf22756d89ccd823078db10 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Mon, 14 Jan 2013 13:56:52 -0800 Subject: [PATCH 70/77] Fix compiler warnings The warnings caused verify failure with gerrit for several commits Change-Id: I030df8638bd69b8783a3ac58e720ff9f0bfd546c --- vp9/encoder/vp9_dct.c | 2 -- vp9/encoder/vp9_encodeframe.c | 9 --------- vp9/encoder/vp9_rdopt.c | 17 +++++------------ 3 files changed, 5 insertions(+), 23 deletions(-) diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 9f7673061..bfde02ccb 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -2136,8 +2136,6 @@ static void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch, void vp9_short_fdct8x8_c_f(short *block, short *coefs, int pitch, int scale) { int j1, i, j, k; - static int count = 0; - short x[8 * 8]; float b[8]; float b1[8]; float d[8][8]; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 3ae8bd6d6..3f5133062 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -2266,7 +2266,6 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, uint8_t *vdst = xd->dst.v_buffer; int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - int seg_ref_active; unsigned char ref_pred_flag; int n; TOKENEXTRA *tp[4]; @@ -2308,10 +2307,7 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, vp9_update_zbin_extra(cpi, x); - seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME); - // SET VARIOUS PREDICTION FLAGS - // Did the chosen reference frame match its predicted value. ref_pred_flag = ((xd->mode_info_context->mbmi.ref_frame == vp9_get_pred_ref(cm, xd))); @@ -2502,7 +2498,6 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, uint8_t *vdst = xd->dst.v_buffer; int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - int seg_ref_active; unsigned char ref_pred_flag; int n; TOKENEXTRA *tp[16]; @@ -2545,10 +2540,6 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, vp9_update_zbin_extra(cpi, x); - seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME); - - // SET VARIOUS PREDICTION FLAGS - // Did the chosen reference frame match its predicted value. ref_pred_flag = ((xd->mode_info_context->mbmi.ref_frame == vp9_get_pred_ref(cm, xd))); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index a82670b6e..fe2c0f3a7 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -4464,14 +4464,13 @@ void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd = &x->e_mbd; int rate_y, rate_uv; int rate_y_tokenonly, rate_uv_tokenonly; - int error_y, error_uv; int dist_y, dist_uv; int y_skip, uv_skip; int64_t txfm_cache[NB_TXFM_MODES]; - error_y = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, + rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, txfm_cache); - error_uv = rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip); if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) { @@ -4493,14 +4492,13 @@ void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd = &x->e_mbd; int rate_y, rate_uv; int rate_y_tokenonly, rate_uv_tokenonly; - int error_y, error_uv; int dist_y, dist_uv; int y_skip, uv_skip; int64_t txfm_cache[NB_TXFM_MODES]; - error_y = rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly, + rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, txfm_cache); - error_uv = rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, + rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip); if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) { @@ -4602,6 +4600,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, rate = rateuv8x8 + rate16x16 - rateuv8x8_tokenonly - rate16x16_tokenonly + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); dist = dist16x16 + (distuv8x8 >> 2); + mbmi->txfm_size = txfm_size_16x16; memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0, sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff)); @@ -4697,7 +4696,6 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; int saddone = 0; int64_t best_rd = LLONG_MAX; - int64_t best_yrd = LLONG_MAX; int64_t best_txfm_rd[NB_TXFM_MODES]; int64_t best_txfm_diff[NB_TXFM_MODES]; int64_t best_pred_diff[NB_PREDICTION_TYPES]; @@ -5071,11 +5069,6 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } other_cost += ref_costs[xd->mode_info_context->mbmi.ref_frame]; - - /* Calculate the final y RD estimate for this mode */ - best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost), - (distortion2 - distortion_uv)); - *returnrate = rate2; *returndistortion = distortion2; best_rd = this_rd; From fdf8654189720d3de06095167885f9f1a15ab958 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Thu, 10 Jan 2013 15:38:29 -0800 Subject: [PATCH 71/77] change to evaluate reference mvs using above only Change-Id: Ibcc342efac0a9be7a21d9b2c09984d9e16bbb225 --- configure | 1 + vp9/common/vp9_findnearmv.c | 30 +++++++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/configure b/configure index 5506aec45..865e8c29d 100755 --- a/configure +++ b/configure @@ -252,6 +252,7 @@ EXPERIMENT_LIST=" newcoefcontext enable_6tap widerlpf + abovesprefmv " CONFIG_LIST=" external_build diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index 92e0a0603..52b30eff2 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -131,9 +131,11 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int_mv *near) { int i, j; uint8_t *above_src; - uint8_t *left_src; uint8_t *above_ref; +#if !CONFIG_ABOVESPREFMV + uint8_t *left_src; uint8_t *left_ref; +#endif unsigned int score; #if CONFIG_SUBPELREFMV unsigned int sse; @@ -148,14 +150,24 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, #if CONFIG_SUBPELREFMV above_src = xd->dst.y_buffer - xd->dst.y_stride * 2; - left_src = xd->dst.y_buffer - 2; above_ref = ref_y_buffer - ref_y_stride * 2; +#if CONFIG_ABOVESPREFMV + above_src -= 4; + above_ref -= 4; +#else + left_src = xd->dst.y_buffer - 2; left_ref = ref_y_buffer - 2; +#endif #else above_src = xd->dst.y_buffer - xd->dst.y_stride * 3; - left_src = xd->dst.y_buffer - 3; above_ref = ref_y_buffer - ref_y_stride * 3; +#if CONFIG_ABOVESPREFMV + above_src -= 4; + above_ref -= 4; +#else + left_src = xd->dst.y_buffer - 3; left_ref = ref_y_buffer - 3; +#endif #endif // Limit search to the predicted best few candidates @@ -173,11 +185,19 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, zero_seen = zero_seen || !this_mv.as_int; +#if !CONFIG_ABOVESPREFMV clamp_mv(&this_mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN + 24, xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, xd->mb_to_top_edge - LEFT_TOP_MARGIN + 24, xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); +#else + clamp_mv(&this_mv, + xd->mb_to_left_edge - LEFT_TOP_MARGIN + 32, + xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, + xd->mb_to_top_edge - LEFT_TOP_MARGIN + 24, + xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); +#endif #if CONFIG_SUBPELREFMV row_offset = this_mv.as_mv.row >> 3; @@ -213,6 +233,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, score += sse; } } +#if !CONFIG_ABOVESPREFMV if (xd->left_available) { vp9_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride, SP(this_mv.as_mv.col), @@ -245,6 +266,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, score += sse; } } +#endif #else row_offset = (this_mv.as_mv.row > 0) ? ((this_mv.as_mv.row + 3) >> 3):((this_mv.as_mv.row + 4) >> 3); @@ -266,6 +288,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, above_ref + offset + 48, ref_y_stride); } } +#if !CONFIG_ABOVESPREFMV if (xd->left_available) { score += vp9_sad3x16(left_src, xd->dst.y_stride, left_ref + offset, ref_y_stride); @@ -286,6 +309,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, ref_y_stride); } } +#endif #endif // Add the entry to our list and then resort the list on score. ref_scores[i] = score; From ad9a16ed179913bc671b54f498daa7f4256cc32a Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Mon, 14 Jan 2013 09:28:35 -0800 Subject: [PATCH 72/77] changed UV plane loop filtering for TX_8X8 In commit 9a1d73d, loop filtering was added for UV 4x4 boundaries when TX_8X8 is used by a MB. This commit further refined the decision to be based on the actual transform used for the UV planes. When UV planes use 4x4 transform, i.e. when prediction mode used is either I8X8_PRED or SPLITMV, UV planes are filtered on 4x4 boundaries, and no filtering is applied on 4x4 block boundaries when UV planes use 8X8 transform. Change-Id: Ibb404face0a1d129b4b4abaf67c55d82e8df8bec --- vp9/common/vp9_loopfilter.c | 38 +++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index a7973bea6..54658ebc5 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -232,11 +232,11 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, /* vp9_filter each macro block */ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; + const MB_PREDICTION_MODE mode = mode_info_context->mbmi.mode; + const int mode_index = lfi_n->mode_lf_lut[mode]; const int seg = mode_info_context->mbmi.segment_id; const int ref_frame = mode_info_context->mbmi.ref_frame; const int filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - if (filter_level) { const int skip_lf = mb_lf_skip(&mode_info_context->mbmi); const int tx_size = mode_info_context->mbmi.txfm_size; @@ -255,19 +255,24 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, #if CONFIG_WIDERLPF if (tx_size >= TX_16X16) vp9_lpf_mbv_w(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); + post->uv_stride, &lfi); else #endif vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); + post->uv_stride, &lfi); } if (!skip_lf) { - if (tx_size >= TX_8X8) - vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - else + if (tx_size >= TX_8X8) { + if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV)) + vp9_loop_filter_bv8x8(y_ptr, u_ptr, v_ptr, post->y_stride, + post->uv_stride, &lfi); + else + vp9_loop_filter_bv8x8(y_ptr, NULL, NULL, post->y_stride, + post->uv_stride, &lfi); + } else { vp9_loop_filter_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); + } } /* don't apply across umv border */ @@ -279,19 +284,24 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, #if CONFIG_WIDERLPF if (tx_size >= TX_16X16) vp9_lpf_mbh_w(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); + post->uv_stride, &lfi); else #endif vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); + post->uv_stride, &lfi); } if (!skip_lf) { - if (tx_size >= TX_8X8) - vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, post->y_stride, - post->uv_stride, &lfi); - else + if (tx_size >= TX_8X8) { + if (tx_size == TX_8X8 && (mode == I8X8_PRED || mode == SPLITMV)) + vp9_loop_filter_bh8x8(y_ptr, u_ptr, v_ptr, post->y_stride, + post->uv_stride, &lfi); + else + vp9_loop_filter_bh8x8(y_ptr, NULL, NULL, post->y_stride, + post->uv_stride, &lfi); + } else { vp9_loop_filter_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); + } } } else { // FIXME: Not 8x8 aware From f7dab600961bbf1ec0475512f1d009b7b0634ebf Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Mon, 14 Jan 2013 12:11:06 -0800 Subject: [PATCH 73/77] Merge experiment "widerlpf" Change-Id: I0c94475075e66e13cfe4c20fab7db6474441ae86 --- configure | 1 - vp9/common/vp9_loopfilter.c | 4 ---- vp9/common/vp9_loopfilter_filters.c | 2 -- vp9/common/vp9_rtcd_defs.sh | 2 -- vp9/common/x86/vp9_loopfilter_x86.c | 13 +++++-------- 5 files changed, 5 insertions(+), 17 deletions(-) diff --git a/configure b/configure index 4f0ae55e1..b6cf2752e 100755 --- a/configure +++ b/configure @@ -250,7 +250,6 @@ EXPERIMENT_LIST=" cnvcontext newcoefcontext enable_6tap - widerlpf abovesprefmv " CONFIG_LIST=" diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 54658ebc5..7633887a3 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -252,12 +252,10 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, (sb_mb_lf_skip(mode_info_context - 1, mode_info_context) || tx_size >= TX_32X32)) ) { -#if CONFIG_WIDERLPF if (tx_size >= TX_16X16) vp9_lpf_mbv_w(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); else -#endif vp9_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); } @@ -281,12 +279,10 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, (sb_mb_lf_skip(mode_info_context - mis, mode_info_context) || tx_size >= TX_32X32)) ) { -#if CONFIG_WIDERLPF if (tx_size >= TX_16X16) vp9_lpf_mbh_w(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); else -#endif vp9_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); } diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c index 1566abf9b..fbce50d05 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vp9/common/vp9_loopfilter_filters.c @@ -481,7 +481,6 @@ void vp9_loop_filter_bvs_c(uint8_t *y_ptr, int y_stride, vp9_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit); } -#if CONFIG_WIDERLPF static __inline void wide_mbfilter(int8_t mask, uint8_t hev, uint8_t flat, uint8_t flat2, uint8_t *op7, uint8_t *op6, uint8_t *op5, @@ -720,4 +719,3 @@ void vp9_lpf_mbh_w_c(unsigned char *y_ptr, unsigned char *u_ptr, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } -#endif diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index d8517bbfa..fdffa2f6e 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -233,13 +233,11 @@ vp9_loop_filter_simple_bh_c=vp9_loop_filter_bhs_c vp9_loop_filter_simple_bh_mmx=vp9_loop_filter_bhs_mmx vp9_loop_filter_simple_bh_sse2=vp9_loop_filter_bhs_sse2 -if [ "$CONFIG_WIDERLPF" = "yes" ]; then prototype void vp9_lpf_mbh_w "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi" specialize vp9_lpf_mbh_w sse2 prototype void vp9_lpf_mbv_w "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi" specialize vp9_lpf_mbv_w sse2 -fi # # post proc diff --git a/vp9/common/x86/vp9_loopfilter_x86.c b/vp9/common/x86/vp9_loopfilter_x86.c index 19388c287..e73850dd9 100644 --- a/vp9/common/x86/vp9_loopfilter_x86.c +++ b/vp9/common/x86/vp9_loopfilter_x86.c @@ -86,7 +86,6 @@ void vp9_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, #if HAVE_SSE2 -#if CONFIG_WIDERLPF void vp9_mb_lpf_horizontal_edge_w_sse2(unsigned char *s, int p, const unsigned char *_blimit, @@ -559,7 +558,6 @@ void vp9_mb_lpf_horizontal_edge_w_sse2(unsigned char *s, } } } -#endif void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s, int p, @@ -1038,7 +1036,6 @@ void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s, transpose(src, 16, dst, p, 2); } -#if CONFIG_WIDERLPF void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s, int p, const unsigned char *blimit, @@ -1069,7 +1066,7 @@ void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s, /* Transpose 16x16 */ transpose(src, 16, dst, p, 4); } -#endif + void vp9_mbloop_filter_vertical_edge_uv_sse2(unsigned char *u, int p, @@ -1113,7 +1110,7 @@ void vp9_loop_filter_mbh_sse2(unsigned char *y_ptr, lfi->lim, lfi->hev_thr, v_ptr); } -#if CONFIG_WIDERLPF + void vp9_lpf_mbh_w_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { @@ -1125,7 +1122,7 @@ void vp9_lpf_mbh_w_sse2(unsigned char *y_ptr, unsigned char *u_ptr, vp9_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); } -#endif + void vp9_loop_filter_bh8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, @@ -1152,7 +1149,7 @@ void vp9_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, lfi->lim, lfi->hev_thr, v_ptr); } -#if CONFIG_WIDERLPF + void vp9_lpf_mbv_w_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi) { @@ -1164,7 +1161,7 @@ void vp9_lpf_mbv_w_sse2(unsigned char *y_ptr, unsigned char *u_ptr, vp9_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); } -#endif + void vp9_loop_filter_bv8x8_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, From 741fbe96562ab91f617069e80e45ecb72a2349e2 Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Mon, 14 Jan 2013 14:39:54 -0800 Subject: [PATCH 74/77] Merge experiment "subpelrefmv" Change-Id: Iac7f3d108863552b850c92c727e00c95571c9e96 --- configure | 1 - vp9/common/vp9_findnearmv.c | 62 +------------------------------------ vp9/common/vp9_rtcd_defs.sh | 2 -- vp9/vp9_common.mk | 2 -- 4 files changed, 1 insertion(+), 66 deletions(-) diff --git a/configure b/configure index b6cf2752e..5f80c9a08 100755 --- a/configure +++ b/configure @@ -240,7 +240,6 @@ EXPERIMENT_LIST=" csm comp_intra_pred lossless - subpelrefmv new_mvref implicit_segmentation newbintramodes diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index 52b30eff2..41d18dbfb 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -55,7 +55,7 @@ unsigned int vp9_sad16x3_c(const uint8_t *src_ptr, return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3); } -#if CONFIG_SUBPELREFMV + unsigned int vp9_variance2x16_c(const uint8_t *src_ptr, const int source_stride, const uint8_t *ref_ptr, @@ -117,7 +117,6 @@ unsigned int vp9_sub_pixel_variance2x16_c(const uint8_t *src_ptr, return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse); } -#endif /* check a list of motion vectors by sad score using a number rows of pixels * above and a number cols of pixels in the left to select the one with best @@ -137,9 +136,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, uint8_t *left_ref; #endif unsigned int score; -#if CONFIG_SUBPELREFMV unsigned int sse; -#endif unsigned int ref_scores[MAX_MV_REF_CANDIDATES] = {0}; int_mv sorted_mvs[MAX_MV_REF_CANDIDATES]; int zero_seen = FALSE; @@ -148,7 +145,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, nearest->as_int = near->as_int = 0; vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs)); -#if CONFIG_SUBPELREFMV above_src = xd->dst.y_buffer - xd->dst.y_stride * 2; above_ref = ref_y_buffer - ref_y_stride * 2; #if CONFIG_ABOVESPREFMV @@ -157,17 +153,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, #else left_src = xd->dst.y_buffer - 2; left_ref = ref_y_buffer - 2; -#endif -#else - above_src = xd->dst.y_buffer - xd->dst.y_stride * 3; - above_ref = ref_y_buffer - ref_y_stride * 3; -#if CONFIG_ABOVESPREFMV - above_src -= 4; - above_ref -= 4; -#else - left_src = xd->dst.y_buffer - 3; - left_ref = ref_y_buffer - 3; -#endif #endif // Limit search to the predicted best few candidates @@ -199,7 +184,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); #endif -#if CONFIG_SUBPELREFMV row_offset = this_mv.as_mv.row >> 3; col_offset = this_mv.as_mv.col >> 3; offset = ref_y_stride * row_offset + col_offset; @@ -266,50 +250,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, score += sse; } } -#endif -#else - row_offset = (this_mv.as_mv.row > 0) ? - ((this_mv.as_mv.row + 3) >> 3):((this_mv.as_mv.row + 4) >> 3); - col_offset = (this_mv.as_mv.col > 0) ? - ((this_mv.as_mv.col + 3) >> 3):((this_mv.as_mv.col + 4) >> 3); - offset = ref_y_stride * row_offset + col_offset; - score = 0; - if (xd->up_available) { - score += vp9_sad16x3(above_src, xd->dst.y_stride, - above_ref + offset, ref_y_stride); - if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { - score += vp9_sad16x3(above_src + 16, xd->dst.y_stride, - above_ref + offset + 16, ref_y_stride); - } - if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { - score += vp9_sad16x3(above_src + 32, xd->dst.y_stride, - above_ref + offset + 32, ref_y_stride); - score += vp9_sad16x3(above_src + 48, xd->dst.y_stride, - above_ref + offset + 48, ref_y_stride); - } - } -#if !CONFIG_ABOVESPREFMV - if (xd->left_available) { - score += vp9_sad3x16(left_src, xd->dst.y_stride, - left_ref + offset, ref_y_stride); - if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { - score += vp9_sad3x16(left_src + xd->dst.y_stride * 16, - xd->dst.y_stride, - left_ref + offset + ref_y_stride * 16, - ref_y_stride); - } - if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { - score += vp9_sad3x16(left_src + xd->dst.y_stride * 32, - xd->dst.y_stride, - left_ref + offset + ref_y_stride * 32, - ref_y_stride); - score += vp9_sad3x16(left_src + xd->dst.y_stride * 48, - xd->dst.y_stride, - left_ref + offset + ref_y_stride * 48, - ref_y_stride); - } - } -#endif #endif // Add the entry to our list and then resort the list on score. ref_scores[i] = score; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index fdffa2f6e..a510f005a 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -278,10 +278,8 @@ specialize vp9_sad16x3 sse2 prototype unsigned int vp9_sad3x16 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride" specialize vp9_sad3x16 sse2 -if [ "$CONFIG_SUBPELREFMV" = "yes" ]; then prototype unsigned int vp9_sub_pixel_variance16x2 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse" specialize vp9_sub_pixel_variance16x2 sse2 -fi # # Sub Pixel Filters diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 26ebed58e..a1c284a27 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -96,9 +96,7 @@ VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_wrapper_sse2.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_iwalsh_sse2.asm -ifeq ($(CONFIG_SUBPELREFMV),yes) VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_variance_sse2.c -endif VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_ssse3.asm ifeq ($(CONFIG_POSTPROC),yes) From c9071601a251d773e95eeb5f94aff919174de398 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 14 Jan 2013 14:37:53 -0800 Subject: [PATCH 75/77] Remove compound intra-intra experiment. This experiment gives little gains and adds relatively much code complexity (and it hinders other experiments), so let's get rid of it. Change-Id: Id25e79a137a1b8a01138aa27a1fa0ba4a2df274a --- configure | 1 - vp9/common/vp9_blockd.h | 6 - vp9/common/vp9_debugmodes.c | 3 - vp9/common/vp9_entropymode.h | 3 - vp9/common/vp9_reconintra.c | 77 ----- vp9/common/vp9_reconintra4x4.c | 18 - vp9/common/vp9_rtcd_defs.sh | 15 - vp9/decoder/vp9_decodemv.c | 47 --- vp9/decoder/vp9_decodframe.c | 14 +- vp9/encoder/vp9_bitstream.c | 28 -- vp9/encoder/vp9_encodeintra.c | 71 +--- vp9/encoder/vp9_rdopt.c | 606 +++++++++++---------------------- 12 files changed, 206 insertions(+), 683 deletions(-) diff --git a/configure b/configure index 5f80c9a08..cb3c07e79 100755 --- a/configure +++ b/configure @@ -238,7 +238,6 @@ HAVE_LIST=" " EXPERIMENT_LIST=" csm - comp_intra_pred lossless new_mvref implicit_segmentation diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 18c2ae0a8..e838da221 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -215,9 +215,6 @@ union b_mode_info { struct { B_PREDICTION_MODE first; TX_TYPE tx_type; -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE second; -#endif #if CONFIG_NEWBINTRAMODES B_PREDICTION_MODE context; #endif @@ -245,9 +242,6 @@ typedef enum { typedef struct { MB_PREDICTION_MODE mode, uv_mode; -#if CONFIG_COMP_INTRA_PRED - MB_PREDICTION_MODE second_mode, second_uv_mode; -#endif #if CONFIG_COMP_INTERINTRA_PRED MB_PREDICTION_MODE interintra_mode, interintra_uv_mode; #endif diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c index 76318d2e1..5ea7736b7 100644 --- a/vp9/common/vp9_debugmodes.c +++ b/vp9/common/vp9_debugmodes.c @@ -87,9 +87,6 @@ void vp9_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, if (mi[mb_index].mbmi.mode == B_PRED) { fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode.first); -#if CONFIG_COMP_INTRA_PRED - fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode.second); -#endif } else fprintf(mvs, "xx "); diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h index 01e5856f7..e03c6fe6d 100644 --- a/vp9/common/vp9_entropymode.h +++ b/vp9/common/vp9_entropymode.h @@ -16,9 +16,6 @@ #define SUBMVREF_COUNT 5 #define VP9_NUMMBSPLITS 4 -#if CONFIG_COMP_INTRA_PRED -#define DEFAULT_COMP_INTRA_PROB 32 -#endif #if CONFIG_COMP_INTERINTRA_PRED #define VP9_DEF_INTERINTRA_PROB 248 diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index eb99285f4..9b2fad5b1 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -773,28 +773,6 @@ void vp9_build_intra_predictors_sb64y_s(MACROBLOCKD *xd) { xd->up_available, xd->left_available); } -#if CONFIG_COMP_INTRA_PRED -void vp9_build_comp_intra_predictors_mby(MACROBLOCKD *xd) { - uint8_t predictor[2][256]; - int i; - - vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, - predictor[0], 16, - xd->mode_info_context->mbmi.mode, - 16, xd->up_available, - xd->left_available); - vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, - predictor[1], 16, - xd->mode_info_context->mbmi.second_mode, - 16, xd->up_available, - xd->left_available); - - for (i = 0; i < 256; i++) { - xd->predictor[i] = (predictor[0][i] + predictor[1][i] + 1) >> 1; - } -} -#endif - void vp9_build_intra_predictors_mbuv_internal(MACROBLOCKD *xd, uint8_t *upred_ptr, uint8_t *vpred_ptr, @@ -837,25 +815,6 @@ void vp9_build_intra_predictors_sb64uv_s(MACROBLOCKD *xd) { 32); } -#if CONFIG_COMP_INTRA_PRED -void vp9_build_comp_intra_predictors_mbuv(MACROBLOCKD *xd) { - uint8_t predictor[2][2][64]; - int i; - - vp9_build_intra_predictors_mbuv_internal( - xd, predictor[0][0], predictor[1][0], 8, - xd->mode_info_context->mbmi.uv_mode, 8); - vp9_build_intra_predictors_mbuv_internal( - xd, predictor[0][1], predictor[1][1], 8, - xd->mode_info_context->mbmi.second_uv_mode, 8); - for (i = 0; i < 64; i++) { - xd->predictor[256 + i] = (predictor[0][0][i] + predictor[0][1][i] + 1) >> 1; - xd->predictor[256 + 64 + i] = (predictor[1][0][i] + - predictor[1][1][i] + 1) >> 1; - } -} -#endif - void vp9_intra8x8_predict(BLOCKD *xd, int mode, uint8_t *predictor) { @@ -864,24 +823,6 @@ void vp9_intra8x8_predict(BLOCKD *xd, mode, 8, 1, 1); } -#if CONFIG_COMP_INTRA_PRED -void vp9_comp_intra8x8_predict(BLOCKD *xd, - int mode, int second_mode, - uint8_t *out_predictor) { - uint8_t predictor[2][8 * 16]; - int i, j; - - vp9_intra8x8_predict(xd, mode, predictor[0]); - vp9_intra8x8_predict(xd, second_mode, predictor[1]); - - for (i = 0; i < 8 * 16; i += 16) { - for (j = i; j < i + 8; j++) { - out_predictor[j] = (predictor[0][j] + predictor[1][j] + 1) >> 1; - } - } -} -#endif - void vp9_intra_uv4x4_predict(BLOCKD *xd, int mode, uint8_t *predictor) { @@ -890,24 +831,6 @@ void vp9_intra_uv4x4_predict(BLOCKD *xd, mode, 4, 1, 1); } -#if CONFIG_COMP_INTRA_PRED -void vp9_comp_intra_uv4x4_predict(BLOCKD *xd, - int mode, int mode2, - uint8_t *out_predictor) { - uint8_t predictor[2][8 * 4]; - int i, j; - - vp9_intra_uv4x4_predict(xd, mode, predictor[0]); - vp9_intra_uv4x4_predict(xd, mode2, predictor[1]); - - for (i = 0; i < 4 * 8; i += 8) { - for (j = i; j < i + 4; j++) { - out_predictor[j] = (predictor[0][j] + predictor[1][j] + 1) >> 1; - } - } -} -#endif - /* TODO: try different ways of use Y-UV mode correlation Current code assumes that a uv 4x4 block use same mode as corresponding Y 8x8 area diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c index d170e43ea..da607e81c 100644 --- a/vp9/common/vp9_reconintra4x4.c +++ b/vp9/common/vp9_reconintra4x4.c @@ -412,24 +412,6 @@ void vp9_intra4x4_predict(BLOCKD *x, } } -#if CONFIG_COMP_INTRA_PRED -void vp9_comp_intra4x4_predict_c(BLOCKD *x, - int b_mode, int b_mode2, - uint8_t *out_predictor) { - uint8_t predictor[2][4 * 16]; - int i, j; - - vp9_intra4x4_predict(x, b_mode, predictor[0]); - vp9_intra4x4_predict(x, b_mode2, predictor[1]); - - for (i = 0; i < 16 * 4; i += 16) { - for (j = i; j < i + 4; j++) { - out_predictor[j] = (predictor[0][j] + predictor[1][j] + 1) >> 1; - } - } -} -#endif - /* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and * to the right prediction have filled in pixels to use. */ diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index a510f005a..39af2080a 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -149,9 +149,6 @@ specialize vp9_build_intra_predictors_sbuv_s; prototype void vp9_build_intra_predictors_mby "struct macroblockd *x" specialize vp9_build_intra_predictors_mby; -prototype void vp9_build_comp_intra_predictors_mby "struct macroblockd *x" -specialize vp9_build_comp_intra_predictors_mby; - prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x" specialize vp9_build_intra_predictors_mby_s; @@ -161,9 +158,6 @@ specialize vp9_build_intra_predictors_mbuv; prototype void vp9_build_intra_predictors_mbuv_s "struct macroblockd *x" specialize vp9_build_intra_predictors_mbuv_s; -prototype void vp9_build_comp_intra_predictors_mbuv "struct macroblockd *x" -specialize vp9_build_comp_intra_predictors_mbuv; - prototype void vp9_build_intra_predictors_sb64y_s "struct macroblockd *x" specialize vp9_build_intra_predictors_sb64y_s; @@ -173,21 +167,12 @@ specialize vp9_build_intra_predictors_sb64uv_s; prototype void vp9_intra4x4_predict "struct blockd *x, int b_mode, uint8_t *predictor" specialize vp9_intra4x4_predict; -prototype void vp9_comp_intra4x4_predict "struct blockd *x, int b_mode, int second_mode, uint8_t *predictor" -specialize vp9_comp_intra4x4_predict; - prototype void vp9_intra8x8_predict "struct blockd *x, int b_mode, uint8_t *predictor" specialize vp9_intra8x8_predict; -prototype void vp9_comp_intra8x8_predict "struct blockd *x, int b_mode, int second_mode, uint8_t *predictor" -specialize vp9_comp_intra8x8_predict; - prototype void vp9_intra_uv4x4_predict "struct blockd *x, int b_mode, uint8_t *predictor" specialize vp9_intra_uv4x4_predict; -prototype void vp9_comp_intra_uv4x4_predict "struct blockd *x, int b_mode, int second_mode, uint8_t *predictor" -specialize vp9_comp_intra_uv4x4_predict; - # # Loopfilter # diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 8b39a1eb1..c6c3d1576 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -163,17 +163,11 @@ static void kfread_modes(VP9D_COMP *pbi, y_mode = (MB_PREDICTION_MODE) read_kf_mb_ymode(bc, pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]); } -#if CONFIG_COMP_INTRA_PRED - m->mbmi.second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif m->mbmi.ref_frame = INTRA_FRAME; if ((m->mbmi.mode = y_mode) == B_PRED) { int i = 0; -#if CONFIG_COMP_INTRA_PRED - int use_comp_pred = vp9_read(bc, DEFAULT_COMP_INTRA_PROB); -#endif do { const B_PREDICTION_MODE A = above_block_mode(m, i, mis); const B_PREDICTION_MODE L = left_block_mode(m, i); @@ -181,15 +175,6 @@ static void kfread_modes(VP9D_COMP *pbi, m->bmi[i].as_mode.first = (B_PREDICTION_MODE) read_kf_bmode( bc, pbi->common.kf_bmode_prob [A] [L]); -#if CONFIG_COMP_INTRA_PRED - if (use_comp_pred) { - m->bmi[i].as_mode.second = - (B_PREDICTION_MODE) read_kf_bmode( - bc, pbi->common.kf_bmode_prob [A] [L]); - } else { - m->bmi[i].as_mode.second = (B_PREDICTION_MODE)(B_DC_PRED - 1); - } -#endif } while (++i < 16); } if ((m->mbmi.mode = y_mode) == I8X8_PRED) { @@ -202,19 +187,10 @@ static void kfread_modes(VP9D_COMP *pbi, m->bmi[ib + 1].as_mode.first = mode8x8; m->bmi[ib + 4].as_mode.first = mode8x8; m->bmi[ib + 5].as_mode.first = mode8x8; -#if CONFIG_COMP_INTRA_PRED - m->bmi[ib + 0].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1); - m->bmi[ib + 1].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1); - m->bmi[ib + 4].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1); - m->bmi[ib + 5].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif } } else m->mbmi.uv_mode = (MB_PREDICTION_MODE)read_uv_mode(bc, pbi->common.kf_uv_mode_prob[m->mbmi.mode]); -#if CONFIG_COMP_INTRA_PRED - m->mbmi.second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif if (cm->txfm_mode == TX_MODE_SELECT && m->mbmi.mb_skip_coeff == 0 && m->mbmi.mode <= I8X8_PRED) { @@ -1138,16 +1114,10 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, read_ymode(bc, pbi->common.fc.ymode_prob); pbi->common.fc.ymode_counts[mbmi->mode]++; } -#if CONFIG_COMP_INTRA_PRED - mbmi->second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif // If MB mode is BPRED read the block modes if (mbmi->mode == B_PRED) { int j = 0; -#if CONFIG_COMP_INTRA_PRED - int use_comp_pred = vp9_read(bc, DEFAULT_COMP_INTRA_PROB); -#endif do { int m; m = mi->bmi[j].as_mode.first = (B_PREDICTION_MODE) @@ -1156,13 +1126,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS; #endif pbi->common.fc.bmode_counts[m]++; -#if CONFIG_COMP_INTRA_PRED - if (use_comp_pred) { - mi->bmi[j].as_mode.second = (B_PREDICTION_MODE)read_bmode(bc, pbi->common.fc.bmode_prob); - } else { - mi->bmi[j].as_mode.second = (B_PREDICTION_MODE)(B_DC_PRED - 1); - } -#endif } while (++j < 16); } @@ -1177,22 +1140,12 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mi->bmi[ib + 4].as_mode.first = mode8x8; mi->bmi[ib + 5].as_mode.first = mode8x8; pbi->common.fc.i8x8_mode_counts[mode8x8]++; -#if CONFIG_COMP_INTRA_PRED - mi->bmi[ib + 0].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1); - mi->bmi[ib + 1].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1); - mi->bmi[ib + 4].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1); - mi->bmi[ib + 5].as_mode.second = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif } } else { mbmi->uv_mode = (MB_PREDICTION_MODE)read_uv_mode( bc, pbi->common.fc.uv_mode_prob[mbmi->mode]); pbi->common.fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++; } - -#if CONFIG_COMP_INTRA_PRED - mbmi->second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif } if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 && diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 685491ff5..361de33b9 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -418,9 +418,6 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, assert(get_2nd_order_usage(xd) == 0); for (i = 0; i < 16; i++) { int b_mode; -#if CONFIG_COMP_INTRA_PRED - int b_mode2; -#endif BLOCKD *b = &xd->block[i]; b_mode = xd->mode_info_context->bmi[i].as_mode.first; #if CONFIG_NEWBINTRAMODES @@ -429,17 +426,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, #endif if (!xd->mode_info_context->mbmi.mb_skip_coeff) eobtotal += vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i); -#if CONFIG_COMP_INTRA_PRED - b_mode2 = xd->mode_info_context->bmi[i].as_mode.second; - if (b_mode2 == (B_PREDICTION_MODE)(B_DC_PRED - 1)) { -#endif - vp9_intra4x4_predict(b, b_mode, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_comp_intra4x4_predict(b, b_mode, b_mode2, b->predictor); - } -#endif + vp9_intra4x4_predict(b, b_mode, b->predictor); tx_type = get_tx_type_4x4(xd, b); if (tx_type != DCT_DCT) { vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index d8659cbf8..61aac5cd1 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -778,23 +778,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, } if (mode == B_PRED) { int j = 0; -#if CONFIG_COMP_INTRA_PRED - int uses_second = - m->bmi[0].as_mode.second != - (B_PREDICTION_MODE)(B_DC_PRED - 1); - vp9_write(bc, uses_second, DEFAULT_COMP_INTRA_PROB); -#endif do { -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE mode2 = m->bmi[j].as_mode.second; -#endif write_bmode(bc, m->bmi[j].as_mode.first, pc->fc.bmode_prob); -#if CONFIG_COMP_INTRA_PRED - if (uses_second) { - write_bmode(bc, mode2, pc->fc.bmode_prob); - } -#endif } while (++j < 16); } if (mode == I8X8_PRED) { @@ -1025,30 +1011,16 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, if (ym == B_PRED) { int i = 0; -#if CONFIG_COMP_INTRA_PRED - int uses_second = - m->bmi[0].as_mode.second != - (B_PREDICTION_MODE)(B_DC_PRED - 1); - vp9_write(bc, uses_second, DEFAULT_COMP_INTRA_PROB); -#endif do { const B_PREDICTION_MODE A = above_block_mode(m, i, mis); const B_PREDICTION_MODE L = left_block_mode(m, i); const int bm = m->bmi[i].as_mode.first; -#if CONFIG_COMP_INTRA_PRED - const int bm2 = m->bmi[i].as_mode.second; -#endif #ifdef ENTROPY_STATS ++intra_mode_stats [A] [L] [bm]; #endif write_kf_bmode(bc, bm, c->kf_bmode_prob[A][L]); -#if CONFIG_COMP_INTRA_PRED - if (uses_second) { - write_kf_bmode(bc, bm2, c->kf_bmode_prob[A][L]); - } -#endif } while (++i < 16); } if (ym == I8X8_PRED) { diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index 9b106266e..ce9a38003 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -25,9 +25,6 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { if (use_16x16_pred) { mbmi->mode = DC_PRED; -#if CONFIG_COMP_INTRA_PRED - mbmi->second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif mbmi->uv_mode = DC_PRED; mbmi->ref_frame = INTRA_FRAME; @@ -53,17 +50,7 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { b->bmi.as_mode.context = vp9_find_bpred_context(b); #endif -#if CONFIG_COMP_INTRA_PRED - if (b->bmi.as_mode.second == (B_PREDICTION_MODE)(B_DC_PRED - 1)) { -#endif - vp9_intra4x4_predict(b, b->bmi.as_mode.first, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_comp_intra4x4_predict(b, b->bmi.as_mode.first, b->bmi.as_mode.second, - b->predictor); - } -#endif - + vp9_intra4x4_predict(b, b->bmi.as_mode.first, b->predictor); vp9_subtract_b(be, b, 16); tx_type = get_tx_type_4x4(&x->e_mbd, b); @@ -93,14 +80,7 @@ void vp9_encode_intra16x16mby(MACROBLOCK *x) { BLOCK *b = &x->block[0]; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; -#if CONFIG_COMP_INTRA_PRED - if (xd->mode_info_context->mbmi.second_mode == (MB_PREDICTION_MODE)(DC_PRED - 1)) -#endif - vp9_build_intra_predictors_mby(xd); -#if CONFIG_COMP_INTRA_PRED - else - vp9_build_comp_intra_predictors_mby(xd); -#endif + vp9_build_intra_predictors_mby(xd); vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride); @@ -131,15 +111,7 @@ void vp9_encode_intra16x16mbuv(MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; -#if CONFIG_COMP_INTRA_PRED - if (xd->mode_info_context->mbmi.second_uv_mode == (MB_PREDICTION_MODE)(DC_PRED - 1)) { -#endif - vp9_build_intra_predictors_mbuv(xd); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_build_comp_intra_predictors_mbuv(xd); - } -#endif + vp9_build_intra_predictors_mbuv(xd); vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, xd->predictor, x->src.uv_stride); @@ -169,16 +141,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { int i; TX_TYPE tx_type; -#if CONFIG_COMP_INTRA_PRED - if (b->bmi.as_mode.second == (MB_PREDICTION_MODE)(DC_PRED - 1)) { -#endif - vp9_intra8x8_predict(b, b->bmi.as_mode.first, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_comp_intra8x8_predict(b, b->bmi.as_mode.first, b->bmi.as_mode.second, - b->predictor); - } -#endif + vp9_intra8x8_predict(b, b->bmi.as_mode.first, b->predictor); // generate residual blocks vp9_subtract_4b_c(be, b, 16); @@ -231,20 +194,12 @@ void vp9_encode_intra8x8mby(MACROBLOCK *x) { } } -void vp9_encode_intra_uv4x4(MACROBLOCK *x, int ib, - int mode, int second) { +static void encode_intra_uv4x4(MACROBLOCK *x, int ib, + int mode) { BLOCKD *b = &x->e_mbd.block[ib]; BLOCK *be = &x->block[ib]; -#if CONFIG_COMP_INTRA_PRED - if (second == -1) { -#endif - vp9_intra_uv4x4_predict(b, mode, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_comp_intra_uv4x4_predict(b, mode, second, b->predictor); - } -#endif + vp9_intra_uv4x4_predict(b, mode, b->predictor); vp9_subtract_b(be, b, 8); @@ -257,21 +212,17 @@ void vp9_encode_intra_uv4x4(MACROBLOCK *x, int ib, } void vp9_encode_intra8x8mbuv(MACROBLOCK *x) { - int i, ib, mode, second; + int i, ib, mode; BLOCKD *b; for (i = 0; i < 4; i++) { ib = vp9_i8x8_block[i]; b = &x->e_mbd.block[ib]; mode = b->bmi.as_mode.first; -#if CONFIG_COMP_INTRA_PRED - second = b->bmi.as_mode.second; -#else - second = -1; -#endif + /*u */ - vp9_encode_intra_uv4x4(x, i + 16, mode, second); + encode_intra_uv4x4(x, i + 16, mode); /*v */ - vp9_encode_intra_uv4x4(x, i + 20, mode, second); + encode_intra_uv4x4(x, i + 20, mode); } } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 73d5c5adc..573c9a7e4 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1082,20 +1082,12 @@ static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) { static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, BLOCKD *b, B_PREDICTION_MODE *best_mode, -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE *best_second_mode, - int allow_comp, -#endif int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int *bestdistortion) { B_PREDICTION_MODE mode; MACROBLOCKD *xd = &x->e_mbd; - -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE mode2; -#endif int64_t best_rd = LLONG_MAX; int rate = 0; int distortion; @@ -1116,100 +1108,63 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, b->bmi.as_mode.context = vp9_find_bpred_context(b); #endif for (mode = B_DC_PRED; mode < LEFT4X4; mode++) { -#if CONFIG_COMP_INTRA_PRED - for (mode2 = (allow_comp ? 0 : (B_DC_PRED - 1)); - mode2 != (allow_comp ? (mode + 1) : 0); mode2++) { -#endif - int64_t this_rd; - int ratey; + int64_t this_rd; + int ratey; #if CONFIG_NEWBINTRAMODES - if (xd->frame_type == KEY_FRAME) { - if (mode == B_CONTEXT_PRED) continue; -#if CONFIG_COMP_INTRA_PRED - if (mode2 == B_CONTEXT_PRED) continue; -#endif - } else { - if (mode >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS && - mode < B_CONTEXT_PRED) - continue; -#if CONFIG_COMP_INTRA_PRED - if (mode2 >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS && - mode2 < B_CONTEXT_PRED) - continue; -#endif - } -#endif - - b->bmi.as_mode.first = mode; -#if CONFIG_NEWBINTRAMODES - rate = bmode_costs[ - mode == B_CONTEXT_PRED ? mode - CONTEXT_PRED_REPLACEMENTS : mode]; -#else - rate = bmode_costs[mode]; -#endif - -#if CONFIG_COMP_INTRA_PRED - if (mode2 == (B_PREDICTION_MODE)(B_DC_PRED - 1)) { -#endif - vp9_intra4x4_predict(b, mode, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - vp9_comp_intra4x4_predict(b, mode, mode2, b->predictor); -#if CONFIG_NEWBINTRAMODES - rate += bmode_costs[ - mode2 == B_CONTEXT_PRED ? - mode2 - CONTEXT_PRED_REPLACEMENTS : mode2]; -#else - rate += bmode_costs[mode2]; -#endif - } -#endif - vp9_subtract_b(be, b, 16); - - b->bmi.as_mode.first = mode; - tx_type = get_tx_type_4x4(xd, b); - if (tx_type != DCT_DCT) { - vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4); - vp9_ht_quantize_b_4x4(be, b, tx_type); - } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(be, b); - } - - tempa = ta; - templ = tl; - - ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4); - rate += ratey; - distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2; - - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - - if (this_rd < best_rd) { - *bestrate = rate; - *bestratey = ratey; - *bestdistortion = distortion; - best_rd = this_rd; - *best_mode = mode; - best_tx_type = tx_type; - -#if CONFIG_COMP_INTRA_PRED - *best_second_mode = mode2; -#endif - *a = tempa; - *l = templ; - copy_predictor(best_predictor, b->predictor); - vpx_memcpy(best_dqcoeff, b->dqcoeff, 32); - } -#if CONFIG_COMP_INTRA_PRED + if (xd->frame_type == KEY_FRAME) { + if (mode == B_CONTEXT_PRED) continue; + } else { + if (mode >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS && + mode < B_CONTEXT_PRED) + continue; } #endif + + b->bmi.as_mode.first = mode; +#if CONFIG_NEWBINTRAMODES + rate = bmode_costs[ + mode == B_CONTEXT_PRED ? mode - CONTEXT_PRED_REPLACEMENTS : mode]; +#else + rate = bmode_costs[mode]; +#endif + + vp9_intra4x4_predict(b, mode, b->predictor); + vp9_subtract_b(be, b, 16); + + b->bmi.as_mode.first = mode; + tx_type = get_tx_type_4x4(xd, b); + if (tx_type != DCT_DCT) { + vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4); + vp9_ht_quantize_b_4x4(be, b, tx_type); + } else { + x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->quantize_b_4x4(be, b); + } + + tempa = ta; + templ = tl; + + ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4); + rate += ratey; + distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2; + + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + + if (this_rd < best_rd) { + *bestrate = rate; + *bestratey = ratey; + *bestdistortion = distortion; + best_rd = this_rd; + *best_mode = mode; + best_tx_type = tx_type; + *a = tempa; + *l = templ; + copy_predictor(best_predictor, b->predictor); + vpx_memcpy(best_dqcoeff, b->dqcoeff, 32); + } } b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode); -#if CONFIG_COMP_INTRA_PRED - b->bmi.as_mode.second = (B_PREDICTION_MODE)(*best_second_mode); -#endif // inverse transform if (best_tx_type != DCT_DCT) @@ -1222,12 +1177,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, return best_rd; } -static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rate, - int *rate_y, int *Distortion, int64_t best_rd, -#if CONFIG_COMP_INTRA_PRED - int allow_comp, -#endif - int update_contexts) { +static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, + int *Rate, int *rate_y, + int *Distortion, int64_t best_rd, + int update_contexts) { int i; MACROBLOCKD *const xd = &mb->e_mbd; int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; @@ -1258,9 +1211,6 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rat MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_second_mode); -#endif int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); if (xd->frame_type == KEY_FRAME) { @@ -1275,9 +1225,6 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rat total_rd += rd_pick_intra4x4block( cpi, mb, mb->block + i, xd->block + i, &best_mode, -#if CONFIG_COMP_INTRA_PRED - & best_second_mode, allow_comp, -#endif bmode_costs, ta + vp9_block2above[TX_4X4][i], tl + vp9_block2left[TX_4X4][i], &r, &ry, &d); @@ -1286,9 +1233,6 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rat tot_rate_y += ry; mic->bmi[i].as_mode.first = best_mode; -#if CONFIG_COMP_INTRA_PRED - mic->bmi[i].as_mode.second = best_second_mode; -#endif #if 0 // CONFIG_NEWBINTRAMODES printf("%d %d\n", mic->bmi[i].as_mode.first, mic->bmi[i].as_mode.context); @@ -1301,9 +1245,6 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int *Rat if (total_rd >= best_rd) return LLONG_MAX; -#if CONFIG_COMP_INTRA_PRED - cost += vp9_cost_bit(128, allow_comp); -#endif *Rate = cost; *rate_y = tot_rate_y; *Distortion = distortion; @@ -1401,10 +1342,6 @@ static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, MB_PREDICTION_MODE mode; TX_SIZE txfm_size = 0; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); -#if CONFIG_COMP_INTRA_PRED - MB_PREDICTION_MODE mode2; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode2_selected); -#endif MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; int rate, ratey; @@ -1422,76 +1359,49 @@ static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, mbmi->mode = mode; -#if CONFIG_COMP_INTRA_PRED - for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) { - mbmi->second_mode = mode2; - if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) { -#endif - vp9_build_intra_predictors_mby(xd); -#if CONFIG_COMP_INTRA_PRED - } else { - continue; // i.e. disable for now - vp9_build_comp_intra_predictors_mby(xd); - } -#endif + vp9_build_intra_predictors_mby(xd); - macro_block_yrd(cpi, x, &ratey, &distortion, &skip, local_txfm_cache); + macro_block_yrd(cpi, x, &ratey, &distortion, &skip, local_txfm_cache); - // FIXME add compoundmode cost - // FIXME add rate for mode2 - rate = ratey + x->mbmode_cost[xd->frame_type][mbmi->mode]; + // FIXME add compoundmode cost + // FIXME add rate for mode2 + rate = ratey + x->mbmode_cost[xd->frame_type][mbmi->mode]; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - if (this_rd < best_rd) { - mode_selected = mode; - txfm_size = mbmi->txfm_size; -#if CONFIG_COMP_INTRA_PRED - mode2_selected = mode2; -#endif - best_rd = this_rd; - *Rate = rate; - *rate_y = ratey; - *Distortion = distortion; - *skippable = skip; - } - - for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t adj_rd = this_rd + local_txfm_cache[i] - - local_txfm_cache[cpi->common.txfm_mode]; - if (adj_rd < txfm_cache[i]) { - txfm_cache[i] = adj_rd; - } - } - -#if CONFIG_COMP_INTRA_PRED + if (this_rd < best_rd) { + mode_selected = mode; + txfm_size = mbmi->txfm_size; + best_rd = this_rd; + *Rate = rate; + *rate_y = ratey; + *Distortion = distortion; + *skippable = skip; + } + + for (i = 0; i < NB_TXFM_MODES; i++) { + int64_t adj_rd = this_rd + local_txfm_cache[i] - + local_txfm_cache[cpi->common.txfm_mode]; + if (adj_rd < txfm_cache[i]) { + txfm_cache[i] = adj_rd; + } } -#endif } mbmi->txfm_size = txfm_size; mbmi->mode = mode_selected; -#if CONFIG_COMP_INTRA_PRED - mbmi->second_mode = mode2_selected; -#endif return best_rd; } static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, B_PREDICTION_MODE *best_mode, -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE *best_second_mode, -#endif int *mode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int *bestdistortion) { MB_PREDICTION_MODE mode; -#if CONFIG_COMP_INTRA_PRED - MB_PREDICTION_MODE mode2; -#endif MACROBLOCKD *xd = &x->e_mbd; int64_t best_rd = LLONG_MAX; int distortion = 0, rate = 0; @@ -1513,107 +1423,86 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int idx = (ib & 0x02) ? (ib + 2) : ib; for (mode = DC_PRED; mode <= TM_PRED; mode++) { -#if CONFIG_COMP_INTRA_PRED - for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) { -#endif - int64_t this_rd; - int rate_t = 0; + int64_t this_rd; + int rate_t = 0; - // FIXME rate for compound mode and second intrapred mode - rate = mode_costs[mode]; - b->bmi.as_mode.first = mode; + // FIXME rate for compound mode and second intrapred mode + rate = mode_costs[mode]; + b->bmi.as_mode.first = mode; -#if CONFIG_COMP_INTRA_PRED - if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) { -#endif - vp9_intra8x8_predict(b, mode, b->predictor); -#if CONFIG_COMP_INTRA_PRED - } else { - continue; // i.e. disable for now - vp9_comp_intra8x8_predict(b, mode, mode2, b->predictor); - } -#endif + vp9_intra8x8_predict(b, mode, b->predictor); - vp9_subtract_4b_c(be, b, 16); + vp9_subtract_4b_c(be, b, 16); - assert(get_2nd_order_usage(xd) == 0); - if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - TX_TYPE tx_type = get_tx_type_8x8(xd, b); - if (tx_type != DCT_DCT) - vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8); - else - x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); - x->quantize_b_8x8(x->block + idx, xd->block + idx); + assert(get_2nd_order_usage(xd) == 0); + if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { + TX_TYPE tx_type = get_tx_type_8x8(xd, b); + if (tx_type != DCT_DCT) + vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8); + else + x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); + x->quantize_b_8x8(x->block + idx, xd->block + idx); - // compute quantization mse of 8x8 block - distortion = vp9_block_error_c((x->block + idx)->coeff, - (xd->block + idx)->dqcoeff, 64); - ta0 = a[vp9_block2above[TX_8X8][idx]]; - tl0 = l[vp9_block2left[TX_8X8][idx]]; + // compute quantization mse of 8x8 block + distortion = vp9_block_error_c((x->block + idx)->coeff, + (xd->block + idx)->dqcoeff, 64); + ta0 = a[vp9_block2above[TX_8X8][idx]]; + tl0 = l[vp9_block2left[TX_8X8][idx]]; - rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC, - &ta0, &tl0, TX_8X8); + rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC, + &ta0, &tl0, TX_8X8); - rate += rate_t; - ta1 = ta0; - tl1 = tl0; - } else { - static const int iblock[4] = {0, 1, 4, 5}; - TX_TYPE tx_type; - int i; - ta0 = a[vp9_block2above[TX_4X4][ib]]; - ta1 = a[vp9_block2above[TX_4X4][ib + 1]]; - tl0 = l[vp9_block2left[TX_4X4][ib]]; - tl1 = l[vp9_block2left[TX_4X4][ib + 4]]; - distortion = 0; - rate_t = 0; - for (i = 0; i < 4; ++i) { - b = &xd->block[ib + iblock[i]]; - be = &x->block[ib + iblock[i]]; - tx_type = get_tx_type_4x4(xd, b); - if (tx_type != DCT_DCT) { - vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4); - vp9_ht_quantize_b_4x4(be, b, tx_type); - } else { - x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); - x->quantize_b_4x4(be, b); - } - distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16); - rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, - // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0, - &ta0, &tl0, - TX_4X4); + rate += rate_t; + ta1 = ta0; + tl1 = tl0; + } else { + static const int iblock[4] = {0, 1, 4, 5}; + TX_TYPE tx_type; + int i; + ta0 = a[vp9_block2above[TX_4X4][ib]]; + ta1 = a[vp9_block2above[TX_4X4][ib + 1]]; + tl0 = l[vp9_block2left[TX_4X4][ib]]; + tl1 = l[vp9_block2left[TX_4X4][ib + 4]]; + distortion = 0; + rate_t = 0; + for (i = 0; i < 4; ++i) { + b = &xd->block[ib + iblock[i]]; + be = &x->block[ib + iblock[i]]; + tx_type = get_tx_type_4x4(xd, b); + if (tx_type != DCT_DCT) { + vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4); + vp9_ht_quantize_b_4x4(be, b, tx_type); + } else { + x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); + x->quantize_b_4x4(be, b); } - rate += rate_t; + distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16); + rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, + // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0, + &ta0, &tl0, + TX_4X4); } + rate += rate_t; + } - distortion >>= 2; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - if (this_rd < best_rd) { - *bestrate = rate; - *bestratey = rate_t; - *bestdistortion = distortion; - besta0 = ta0; - besta1 = ta1; - bestl0 = tl0; - bestl1 = tl1; - best_rd = this_rd; - *best_mode = mode; -#if CONFIG_COMP_INTRA_PRED - *best_second_mode = mode2; -#endif - copy_predictor_8x8(best_predictor, b->predictor); - vpx_memcpy(best_dqcoeff, b->dqcoeff, 64); - vpx_memcpy(best_dqcoeff + 32, b->dqcoeff + 64, 64); -#if CONFIG_COMP_INTRA_PRED - } -#endif + distortion >>= 2; + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + if (this_rd < best_rd) { + *bestrate = rate; + *bestratey = rate_t; + *bestdistortion = distortion; + besta0 = ta0; + besta1 = ta1; + bestl0 = tl0; + bestl1 = tl1; + best_rd = this_rd; + *best_mode = mode; + copy_predictor_8x8(best_predictor, b->predictor); + vpx_memcpy(best_dqcoeff, b->dqcoeff, 64); + vpx_memcpy(best_dqcoeff + 32, b->dqcoeff + 64, 64); } } b->bmi.as_mode.first = (*best_mode); -#if CONFIG_COMP_INTRA_PRED - b->bmi.as_mode.second = (*best_second_mode); -#endif vp9_encode_intra8x8(x, ib); if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { @@ -1656,25 +1545,16 @@ static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, for (i = 0; i < 4; i++) { MODE_INFO *const mic = xd->mode_info_context; B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); -#if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_second_mode); -#endif int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); ib = vp9_i8x8_block[i]; total_rd += rd_pick_intra8x8block( cpi, mb, ib, &best_mode, -#if CONFIG_COMP_INTRA_PRED - & best_second_mode, -#endif i8x8mode_costs, ta, tl, &r, &ry, &d); cost += r; distortion += d; tot_rate_y += ry; mic->bmi[ib].as_mode.first = best_mode; -#if CONFIG_COMP_INTRA_PRED - mic->bmi[ib].as_mode.second = best_second_mode; -#endif } *Rate = cost; @@ -1887,10 +1767,6 @@ static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, int *skippable) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); -#if CONFIG_COMP_INTRA_PRED - MB_PREDICTION_MODE mode2; - MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode2_selected); -#endif MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; int64_t best_rd = LLONG_MAX; @@ -1898,50 +1774,33 @@ static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, int rate_to, UNINITIALIZED_IS_SAFE(skip); for (mode = DC_PRED; mode <= TM_PRED; mode++) { -#if CONFIG_COMP_INTRA_PRED - for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) { -#endif - int rate; - int distortion; - int64_t this_rd; + int rate; + int distortion; + int64_t this_rd; - mbmi->uv_mode = mode; -#if CONFIG_COMP_INTRA_PRED - mbmi->second_uv_mode = mode2; - if (mode2 == (MB_PREDICTION_MODE)(DC_PRED - 1)) { -#endif - vp9_build_intra_predictors_mbuv(&x->e_mbd); -#if CONFIG_COMP_INTRA_PRED - } else { - continue; - vp9_build_comp_intra_predictors_mbuv(&x->e_mbd); - } -#endif + mbmi->uv_mode = mode; + vp9_build_intra_predictors_mbuv(&x->e_mbd); - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - vp9_transform_mbuv_4x4(x); - vp9_quantize_mbuv_4x4(x); + vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, + x->e_mbd.predictor, x->src.uv_stride); + vp9_transform_mbuv_4x4(x); + vp9_quantize_mbuv_4x4(x); - rate_to = rd_cost_mbuv_4x4(x, 1); - rate = rate_to - + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode]; + rate_to = rd_cost_mbuv_4x4(x, 1); + rate = rate_to + + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode]; - distortion = vp9_mbuverror(x) / 4; + distortion = vp9_mbuverror(x) / 4; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - if (this_rd < best_rd) { - skip = vp9_mbuv_is_skippable_4x4(xd); - best_rd = this_rd; - d = distortion; - r = rate; - *rate_tokenonly = rate_to; - mode_selected = mode; -#if CONFIG_COMP_INTRA_PRED - mode2_selected = mode2; - } -#endif + if (this_rd < best_rd) { + skip = vp9_mbuv_is_skippable_4x4(xd); + best_rd = this_rd; + d = distortion; + r = rate; + *rate_tokenonly = rate_to; + mode_selected = mode; } } @@ -1950,9 +1809,6 @@ static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, *skippable = skip; mbmi->uv_mode = mode_selected; -#if CONFIG_COMP_INTRA_PRED - mbmi->second_uv_mode = mode2_selected; -#endif } static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi, @@ -3100,24 +2956,17 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, x->mv_best_ref_index[ref_frame] = best_index; } -static void set_i8x8_block_modes(MACROBLOCK *x, int modes[2][4]) { +static void set_i8x8_block_modes(MACROBLOCK *x, int modes[4]) { int i; MACROBLOCKD *xd = &x->e_mbd; for (i = 0; i < 4; i++) { int ib = vp9_i8x8_block[i]; - xd->mode_info_context->bmi[ib + 0].as_mode.first = modes[0][i]; - xd->mode_info_context->bmi[ib + 1].as_mode.first = modes[0][i]; - xd->mode_info_context->bmi[ib + 4].as_mode.first = modes[0][i]; - xd->mode_info_context->bmi[ib + 5].as_mode.first = modes[0][i]; -#if CONFIG_COMP_INTRA_PRED - xd->mode_info_context->bmi[ib + 0].as_mode.second = modes[1][i]; - xd->mode_info_context->bmi[ib + 1].as_mode.second = modes[1][i]; - xd->mode_info_context->bmi[ib + 4].as_mode.second = modes[1][i]; - xd->mode_info_context->bmi[ib + 5].as_mode.second = modes[1][i]; -#endif - // printf("%d,%d,%d,%d %d,%d,%d,%d\n", - // modes[0][0], modes[0][1], modes[0][2], modes[0][3], - // modes[1][0], modes[1][1], modes[1][2], modes[1][3]); + xd->mode_info_context->bmi[ib + 0].as_mode.first = modes[i]; + xd->mode_info_context->bmi[ib + 1].as_mode.first = modes[i]; + xd->mode_info_context->bmi[ib + 4].as_mode.first = modes[i]; + xd->mode_info_context->bmi[ib + 5].as_mode.first = modes[i]; + // printf("%d,%d,%d,%d\n", + // modes[0], modes[1], modes[2], modes[3]); } for (i = 0; i < 16; i++) { @@ -3676,7 +3525,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, MB_PREDICTION_MODE best_mode = DC_PRED; MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; int i, best_mode_index = 0; - int mode8x8[2][4]; + int mode8x8[4]; unsigned char segment_id = mbmi->segment_id; int mode_index; @@ -3834,10 +3683,6 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, continue; // current coding mode under rate-distortion optimization test loop -#if CONFIG_COMP_INTRA_PRED - mbmi->second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); - mbmi->second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif #if CONFIG_COMP_INTERINTRA_PRED mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); @@ -3952,10 +3797,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Note the rate value returned here includes the cost of coding // the BPRED mode : x->mbmode_cost[xd->frame_type][BPRED]; mbmi->txfm_size = TX_4X4; - tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd, -#if CONFIG_COMP_INTRA_PRED - 0, -#endif + tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, + &distortion, best_yrd, cpi->update_context); rate2 += rate; rate2 += intra_cost_penalty; @@ -3981,16 +3824,10 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->txfm_size = TX_4X4; tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4, &d4x4, best_yrd); - mode8x8[0][0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[0][1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[0][2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[0][3] = xd->mode_info_context->bmi[10].as_mode.first; -#if CONFIG_COMP_INTRA_PRED - mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second; - mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second; - mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second; - mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second; -#endif + mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first; + mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first; + mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first; + mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first; mbmi->txfm_size = TX_8X8; tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8, &d8x8, best_yrd); @@ -4014,16 +3851,10 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->txfm_size = TX_8X8; tmp_rd = tmp_rd_8x8s; - mode8x8[0][0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[0][1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[0][2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[0][3] = xd->mode_info_context->bmi[10].as_mode.first; -#if CONFIG_COMP_INTRA_PRED - mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second; - mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second; - mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second; - mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second; -#endif + mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first; + mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first; + mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first; + mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first; } } else if (cm->txfm_mode == ONLY_4X4) { rate = r4x4; @@ -4038,16 +3869,10 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->txfm_size = TX_8X8; tmp_rd = tmp_rd_8x8; - mode8x8[0][0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[0][1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[0][2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[0][3] = xd->mode_info_context->bmi[10].as_mode.first; -#if CONFIG_COMP_INTRA_PRED - mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second; - mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second; - mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second; - mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second; -#endif + mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first; + mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first; + mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first; + mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first; } rate2 += rate; @@ -4519,10 +4344,6 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; int64_t error4x4, error16x16; -#if CONFIG_COMP_INTRA_PRED - int64_t error4x4d; - int rate4x4d, dist4x4d; -#endif int rate4x4, rate16x16 = 0, rateuv, rateuv8x8; int dist4x4 = 0, dist16x16 = 0, distuv = 0, distuv8x8 = 0; int rate; @@ -4533,7 +4354,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, int rate8x8_tokenonly=0; int rate8x8, dist8x8; int mode16x16; - int mode8x8[2][4]; + int mode8x8[4]; int dist; int modeuv, uv_intra_skippable, uv_intra_skippable_8x8; int y_intra16x16_skippable = 0; @@ -4566,30 +4387,15 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->txfm_size = (cm->txfm_mode == ONLY_4X4) ? TX_4X4 : TX_8X8; error8x8 = rd_pick_intra8x8mby_modes(cpi, x, &rate8x8, &rate8x8_tokenonly, &dist8x8, error16x16); - mode8x8[0][0]= xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[0][1]= xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[0][2]= xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[0][3]= xd->mode_info_context->bmi[10].as_mode.first; -#if CONFIG_COMP_INTRA_PRED - mode8x8[1][0] = xd->mode_info_context->bmi[0].as_mode.second; - mode8x8[1][1] = xd->mode_info_context->bmi[2].as_mode.second; - mode8x8[1][2] = xd->mode_info_context->bmi[8].as_mode.second; - mode8x8[1][3] = xd->mode_info_context->bmi[10].as_mode.second; -#endif + mode8x8[0]= xd->mode_info_context->bmi[0].as_mode.first; + mode8x8[1]= xd->mode_info_context->bmi[2].as_mode.first; + mode8x8[2]= xd->mode_info_context->bmi[8].as_mode.first; + mode8x8[3]= xd->mode_info_context->bmi[10].as_mode.first; error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, error16x16, -#if CONFIG_COMP_INTRA_PRED - 0, -#endif cpi->update_context); -#if CONFIG_COMP_INTRA_PRED - error4x4d = rd_pick_intra4x4mby_modes(cpi, x, - &rate4x4d, &rate4x4_tokenonly, - &dist4x4d, error16x16, 1, - cpi->update_context); -#endif mbmi->mb_skip_coeff = 0; if (cpi->common.mb_no_coeff_skip && @@ -4606,17 +4412,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff)); } else if (error8x8 > error16x16) { if (error4x4 < error16x16) { - rate = rateuv; -#if CONFIG_COMP_INTRA_PRED - rate += (error4x4d < error4x4) ? rate4x4d : rate4x4; - if (error4x4d >= error4x4) // FIXME save original modes etc. - error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, - &rate4x4_tokenonly, - &dist4x4, error16x16, 0, - cpi->update_context); -#else - rate += rate4x4; -#endif + rate = rateuv + rate4x4; mbmi->mode = B_PRED; mbmi->txfm_size = TX_4X4; dist = dist4x4 + (distuv >> 2); @@ -4636,17 +4432,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); } else { if (error4x4 < error8x8) { - rate = rateuv; -#if CONFIG_COMP_INTRA_PRED - rate += (error4x4d < error4x4) ? rate4x4d : rate4x4; - if (error4x4d >= error4x4) // FIXME save original modes etc. - error4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, - &rate4x4_tokenonly, - &dist4x4, error16x16, 0, - cpi->update_context); -#else - rate += rate4x4; -#endif + rate = rateuv + rate4x4; mbmi->mode = B_PRED; mbmi->txfm_size = TX_4X4; dist = dist4x4 + (distuv >> 2); @@ -4817,10 +4603,6 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, comp_pred = mbmi->second_ref_frame > INTRA_FRAME; mbmi->mode = this_mode; mbmi->uv_mode = DC_PRED; -#if CONFIG_COMP_INTRA_PRED - mbmi->second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); - mbmi->second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif #if CONFIG_COMP_INTERINTRA_PRED mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); From 24bc1a7189724e1b67703a6165ec7e170ce784d6 Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Mon, 14 Jan 2013 11:49:30 -0800 Subject: [PATCH 76/77] Use INT64_MAX instead of LLONG_MAX These variables have the type int64_t, not long long. long long could be a larger type than 64 bits. Emulate INT64_MAX for older versions of MSVC, and remove the unreferenced vpx_ports/vpxtypes.h Change-Id: Ideaca71838fcd3849d816d5ab17aa347c97d03b0 --- vp9/common/vp9_implicit_segmentation.c | 4 +- vp9/encoder/vp9_firstpass.c | 2 +- vp9/encoder/vp9_onyx_if.c | 5 +- vp9/encoder/vp9_rdopt.c | 84 ++++++------- vpx/vpx_integer.h | 1 + vpx_ports/vpxtypes.h | 166 ------------------------- 6 files changed, 47 insertions(+), 215 deletions(-) delete mode 100644 vpx_ports/vpxtypes.h diff --git a/vp9/common/vp9_implicit_segmentation.c b/vp9/common/vp9_implicit_segmentation.c index 472c3d1a5..e88eec48c 100644 --- a/vp9/common/vp9_implicit_segmentation.c +++ b/vp9/common/vp9_implicit_segmentation.c @@ -33,8 +33,8 @@ typedef struct { int min_y; int max_x; int max_y; - long long sum_x; - long long sum_y; + int64_t sum_x; + int64_t sum_y; int pixels; int seg_value; int label; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 44b140319..db981754e 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -794,7 +794,7 @@ static double bitcost(double prob) { return -(log(prob) / log(2.0)); } -static long long estimate_modemvcost(VP9_COMP *cpi, +static int64_t estimate_modemvcost(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats) { #if 0 int mv_cost; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index b767ff0b3..fbdc2d2cc 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1727,10 +1727,7 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { #endif -#ifndef LLONG_MAX -#define LLONG_MAX 9223372036854775807LL -#endif - cpi->first_time_stamp_ever = LLONG_MAX; + cpi->first_time_stamp_ever = INT64_MAX; cpi->frames_till_gf_update_due = 0; cpi->key_frame_count = 1; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 573c9a7e4..823476fb3 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1088,7 +1088,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, int *bestdistortion) { B_PREDICTION_MODE mode; MACROBLOCKD *xd = &x->e_mbd; - int64_t best_rd = LLONG_MAX; + int64_t best_rd = INT64_MAX; int rate = 0; int distortion; @@ -1243,7 +1243,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, } if (total_rd >= best_rd) - return LLONG_MAX; + return INT64_MAX; *Rate = cost; *rate_y = tot_rate_y; @@ -1263,7 +1263,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); int this_rate, this_rate_tokenonly; int this_distortion, s; - int64_t best_rd = LLONG_MAX, this_rd; + int64_t best_rd = INT64_MAX, this_rd; /* Y Search for 32x32 intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { @@ -1303,7 +1303,7 @@ static int64_t rd_pick_intra_sb64y_mode(VP9_COMP *cpi, MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); int this_rate, this_rate_tokenonly; int this_distortion, s; - int64_t best_rd = LLONG_MAX, this_rd; + int64_t best_rd = INT64_MAX, this_rd; /* Y Search for 32x32 intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { @@ -1346,12 +1346,12 @@ static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; int rate, ratey; int distortion, skip; - int64_t best_rd = LLONG_MAX; + int64_t best_rd = INT64_MAX; int64_t this_rd; int i; for (i = 0; i < NB_TXFM_MODES; i++) - txfm_cache[i] = LLONG_MAX; + txfm_cache[i] = INT64_MAX; // Y Search for 16x16 intra prediction mode for (mode = DC_PRED; mode <= TM_PRED; mode++) { @@ -1403,7 +1403,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int *bestdistortion) { MB_PREDICTION_MODE mode; MACROBLOCKD *xd = &x->e_mbd; - int64_t best_rd = LLONG_MAX; + int64_t best_rd = INT64_MAX; int distortion = 0, rate = 0; BLOCK *be = x->block + ib; BLOCKD *b = xd->block + ib; @@ -1528,7 +1528,7 @@ static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int cost = mb->mbmode_cost [xd->frame_type] [I8X8_PRED]; int distortion = 0; int tot_rate_y = 0; - long long total_rd = 0; + int64_t total_rd = 0; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta, *tl; int *i8x8mode_costs; @@ -1769,7 +1769,7 @@ static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int64_t best_rd = LLONG_MAX; + int64_t best_rd = INT64_MAX; int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r); int rate_to, UNINITIALIZED_IS_SAFE(skip); @@ -1821,7 +1821,7 @@ static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi, MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int64_t best_rd = LLONG_MAX; + int64_t best_rd = INT64_MAX; int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r); int rate_to, UNINITIALIZED_IS_SAFE(skip); @@ -2010,7 +2010,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, int *skippable) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - int64_t best_rd = LLONG_MAX, this_rd; + int64_t best_rd = INT64_MAX, this_rd; int this_rate_tokenonly, this_rate; int this_distortion, s; @@ -2047,7 +2047,7 @@ static int64_t rd_pick_intra_sb64uv_mode(VP9_COMP *cpi, int *skippable) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - int64_t best_rd = LLONG_MAX, this_rd; + int64_t best_rd = INT64_MAX, this_rd; int this_rate_tokenonly, this_rate; int this_distortion, s; @@ -2442,7 +2442,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, mbmi->txfm_size = tx_size; for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) { int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; - int64_t best_label_rd = LLONG_MAX, best_other_rd = LLONG_MAX; + int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; B_PREDICTION_MODE mode_selected = ZERO4X4; int bestlabelyrate = 0; @@ -2780,7 +2780,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, vpx_memset(&bsi, 0, sizeof(bsi)); for (i = 0; i < NB_TXFM_MODES; i++) - txfm_cache[i] = LLONG_MAX; + txfm_cache[i] = INT64_MAX; bsi.segment_rd = best_rd; bsi.ref_mv = best_ref_mv; @@ -3238,7 +3238,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (is_comp_pred) { if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV || frame_mv[NEWMV][refs[1]].as_int == INVALID_MV) - return LLONG_MAX; + return INT64_MAX; *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[0]], &ref_mv[0], x->nmvjointcost, x->mvcost, 96, @@ -3312,7 +3312,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // near) is 0,0 as this should then be coded using the zeromv mode. for (i = 0; i < num_refs; ++i) if (frame_mv[this_mode][refs[i]].as_int == 0) - return LLONG_MAX; + return INT64_MAX; case ZEROMV: default: break; @@ -3322,7 +3322,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Clip "next_nearest" so that it does not extend to far out of image clamp_mv2(&cur_mv[i], xd); if (mv_check_bounds(x, &cur_mv[i])) - return LLONG_MAX; + return INT64_MAX; mbmi->mv[i].as_int = cur_mv[i].as_int; } @@ -3536,13 +3536,13 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_txfm_diff[NB_TXFM_MODES]; int64_t best_pred_diff[NB_PREDICTION_TYPES]; int64_t best_pred_rd[NB_PREDICTION_TYPES]; - int64_t best_rd = LLONG_MAX, best_intra_rd = LLONG_MAX; + int64_t best_rd = INT64_MAX, best_intra_rd = INT64_MAX; #if CONFIG_COMP_INTERINTRA_PRED int is_best_interintra = 0; - int64_t best_intra16_rd = LLONG_MAX; + int64_t best_intra16_rd = INT64_MAX; int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED; #endif - int64_t best_overall_rd = LLONG_MAX; + int64_t best_overall_rd = INT64_MAX; INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE; int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly; int uv_intra_skippable = 0; @@ -3550,7 +3550,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int uv_intra_skippable_8x8 = 0; int rate_y, UNINITIALIZED_IS_SAFE(rate_uv); int distortion_uv = INT_MAX; - int64_t best_yrd = LLONG_MAX; + int64_t best_yrd = INT64_MAX; int switchable_filter_index = 0; MB_PREDICTION_MODE uv_intra_mode; @@ -3579,9 +3579,9 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_REF_FRAMES; i++) frame_mv[NEWMV][i].as_int = INVALID_MV; for (i = 0; i < NB_PREDICTION_TYPES; ++i) - best_pred_rd[i] = LLONG_MAX; + best_pred_rd[i] = INT64_MAX; for (i = 0; i < NB_TXFM_MODES; i++) - best_txfm_rd[i] = LLONG_MAX; + best_txfm_rd[i] = INT64_MAX; for (i = 0; i < NB_PARTITIONINGS; i++) { int j, k; @@ -3612,7 +3612,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, frame_mdcounts, y_buffer, u_buffer, v_buffer); } - *returnintra = LLONG_MAX; + *returnintra = INT64_MAX; mbmi->ref_frame = INTRA_FRAME; @@ -3640,7 +3640,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (mode_index = 0; mode_index < MAX_MODES; mode_index += (!switchable_filter_index)) { - int64_t this_rd = LLONG_MAX; + int64_t this_rd = INT64_MAX; int disable_skip = 0, skippable = 0; int other_cost = 0; int compmode_cost = 0; @@ -3810,7 +3810,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += uv_intra_distortion; distortion_uv = uv_intra_distortion; } else { - this_rd = LLONG_MAX; + this_rd = INT64_MAX; disable_skip = 1; } } @@ -3887,7 +3887,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += uv_intra_distortion; distortion_uv = uv_intra_distortion; } else { - this_rd = LLONG_MAX; + this_rd = INT64_MAX; disable_skip = 1; } } @@ -3933,7 +3933,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion_uv; skippable = skippable && uv_skippable; } else { - this_rd = LLONG_MAX; + this_rd = INT64_MAX; disable_skip = 1; } @@ -3971,7 +3971,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &rate_uv, &distortion_uv, &mode_excluded, &disable_skip, recon_yoffset, mode_index, frame_mv); - if (this_rd == LLONG_MAX) + if (this_rd == INT64_MAX) continue; } @@ -4158,7 +4158,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best txfm size */ - if (!mode_excluded && this_rd != LLONG_MAX) { + if (!mode_excluded && this_rd != INT64_MAX) { for (i = 0; i < NB_TXFM_MODES; i++) { int64_t adj_rd; if (this_mode != B_PRED) { @@ -4256,7 +4256,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } for (i = 0; i < NB_PREDICTION_TYPES; ++i) { - if (best_pred_rd[i] == LLONG_MAX) + if (best_pred_rd[i] == INT64_MAX) best_pred_diff[i] = INT_MIN; else best_pred_diff[i] = best_rd - best_pred_rd[i]; @@ -4264,7 +4264,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!x->skip) { for (i = 0; i < NB_TXFM_MODES; i++) { - if (best_txfm_rd[i] == LLONG_MAX) + if (best_txfm_rd[i] == INT64_MAX) best_txfm_diff[i] = INT_MIN; else best_txfm_diff[i] = best_rd - best_txfm_rd[i]; @@ -4481,7 +4481,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int mdcounts[4]; int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; int saddone = 0; - int64_t best_rd = LLONG_MAX; + int64_t best_rd = INT64_MAX; int64_t best_txfm_rd[NB_TXFM_MODES]; int64_t best_txfm_diff[NB_TXFM_MODES]; int64_t best_pred_diff[NB_PREDICTION_TYPES]; @@ -4491,10 +4491,10 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, unsigned int ref_costs[MAX_REF_FRAMES]; #if CONFIG_COMP_INTERINTRA_PRED int is_best_interintra = 0; - int64_t best_intra16_rd = LLONG_MAX; + int64_t best_intra16_rd = INT64_MAX; int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED; #endif - int64_t best_overall_rd = LLONG_MAX; + int64_t best_overall_rd = INT64_MAX; INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE; int rate_uv_4x4 = 0, rate_uv_8x8 = 0, rate_uv_tokenonly_4x4 = 0, rate_uv_tokenonly_8x8 = 0; @@ -4511,9 +4511,9 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); for (i = 0; i < NB_PREDICTION_TYPES; ++i) - best_pred_rd[i] = LLONG_MAX; + best_pred_rd[i] = INT64_MAX; for (i = 0; i < NB_TXFM_MODES; i++) - best_txfm_rd[i] = LLONG_MAX; + best_txfm_rd[i] = INT64_MAX; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { @@ -4573,7 +4573,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, for (mode_index = 0; mode_index < MAX_MODES; mode_index += (!switchable_filter_index)) { int mode_excluded = 0; - int64_t this_rd = LLONG_MAX; + int64_t this_rd = INT64_MAX; int disable_skip = 0; int other_cost = 0; int compmode_cost = 0; @@ -4744,7 +4744,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, &rate_uv, &distortion_uv, &mode_excluded, &disable_skip, recon_yoffset, mode_index, frame_mv); - if (this_rd == LLONG_MAX) + if (this_rd == INT64_MAX) continue; } @@ -4908,7 +4908,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best txfm size */ - if (!mode_excluded && this_rd != LLONG_MAX) { + if (!mode_excluded && this_rd != INT64_MAX) { for (i = 0; i < NB_TXFM_MODES; i++) { int64_t adj_rd; if (this_mode != B_PRED) { @@ -4982,7 +4982,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); for (i = 0; i < NB_PREDICTION_TYPES; ++i) { - if (best_pred_rd[i] == LLONG_MAX) + if (best_pred_rd[i] == INT64_MAX) best_pred_diff[i] = INT_MIN; else best_pred_diff[i] = best_rd - best_pred_rd[i]; @@ -4990,7 +4990,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (!x->skip) { for (i = 0; i < NB_TXFM_MODES; i++) { - if (best_txfm_rd[i] == LLONG_MAX) + if (best_txfm_rd[i] == INT64_MAX) best_txfm_diff[i] = INT_MIN; else best_txfm_diff[i] = best_rd - best_txfm_rd[i]; diff --git a/vpx/vpx_integer.h b/vpx/vpx_integer.h index 218bca773..f04c61c04 100644 --- a/vpx/vpx_integer.h +++ b/vpx/vpx_integer.h @@ -27,6 +27,7 @@ typedef unsigned int uint32_t; #if (defined(_MSC_VER) && (_MSC_VER < 1600)) typedef signed __int64 int64_t; typedef unsigned __int64 uint64_t; +#define INT64_MAX _I64_MAX #endif #ifndef _UINTPTR_T_DEFINED diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h deleted file mode 100644 index 4365213f2..000000000 --- a/vpx_ports/vpxtypes.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __VPXTYPES_H__ -#define __VPXTYPES_H__ - -#include "vpx_config.h" - -// #include -#ifdef _MSC_VER -# include -typedef SSIZE_T ssize_t; -#endif - -#if defined(HAVE_STDINT_H) && HAVE_STDINT_H -/* C99 types are preferred to vpx integer types */ -# include -#endif - -/*!\defgroup basetypes Base Types - @{*/ -#if !defined(HAVE_STDINT_H) && !defined(INT_T_DEFINED) -# ifdef STRICTTYPES -typedef signed char int8_t; -typedef signed short int16_t; -typedef signed int int32_t; -# else -typedef char int8_t; -typedef short int16_t; -typedef int int32_t; -# endif -typedef unsigned char uint8_t; -typedef unsigned short uint16_t; -typedef unsigned int uint32_t; -#endif - -typedef int8_t vpxs8; -typedef uint8_t vpxu8; -typedef int16_t vpxs16; -typedef uint16_t vpxu16; -typedef int32_t vpxs32; -typedef uint32_t vpxu32; -typedef int32_t vpxbool; - -enum {vpxfalse, vpxtrue}; - -/*!\def OTC - \brief a macro suitable for declaring a constant #vpxtc*/ -/*!\def VPXTC - \brief printf format string suitable for printing an #vpxtc*/ -#ifdef UNICODE -# ifdef NO_WCHAR -# error "no non-wchar support added yet" -# else -# include -typedef wchar_t vpxtc; -# define OTC(str) L ## str -# define VPXTC "ls" -# endif /*NO_WCHAR*/ -#else -typedef char vpxtc; -# define OTC(str) (vpxtc*)str -# define VPXTC "s" -#endif /*UNICODE*/ -/*@} end - base types*/ - -/*!\addtogroup basetypes - @{*/ -/*!\def VPX64 - \brief printf format string suitable for printing an #vpxs64*/ -#if defined(HAVE_STDINT_H) -# define VPX64 PRId64 -typedef int64_t vpxs64; -#elif defined(HASLONGLONG) -# undef PRId64 -# define PRId64 "lld" -# define VPX64 PRId64 -typedef long long vpxs64; -#elif defined(WIN32) || defined(_WIN32_WCE) -# undef PRId64 -# define PRId64 "I64d" -# define VPX64 PRId64 -typedef __int64 vpxs64; -typedef unsigned __int64 vpxu64; -#elif defined(__uClinux__) && defined(CHIP_DM642) -# include -# undef PRId64 -# define PRId64 "lld" -# define VPX64 PRId64 -typedef long vpxs64; -#else -# error "64 bit integer type undefined for this platform!" -#endif -#if !defined(HAVE_STDINT_H) && !defined(INT_T_DEFINED) -typedef vpxs64 int64_t; -typedef vpxu64 uint64_t; -#endif -/*!@} end - base types*/ - -/*!\ingroup basetypes - \brief Common return type*/ -typedef enum { - VPX_NOT_FOUND = -404, - VPX_BUFFER_EMPTY = -202, - VPX_BUFFER_FULL = -201, - - VPX_CONNREFUSED = -102, - VPX_TIMEDOUT = -101, - VPX_WOULDBLOCK = -100, - - VPX_NET_ERROR = -9, - VPX_INVALID_VERSION = -8, - VPX_INPROGRESS = -7, - VPX_NOT_SUPP = -6, - VPX_NO_MEM = -3, - VPX_INVALID_PARAMS = -2, - VPX_ERROR = -1, - VPX_OK = 0, - VPX_DONE = 1 -} vpxsc; - -#if defined(WIN32) || defined(_WIN32_WCE) -# define DLLIMPORT __declspec(dllimport) -# define DLLEXPORT __declspec(dllexport) -# define DLLLOCAL -#elif defined(LINUX) -# define DLLIMPORT -/*visibility attribute support is available in 3.4 and later. - see: http:// gcc.gnu.org/wiki/Visibility for more info*/ -# if defined(__GNUC__) && ((__GNUC__<<16|(__GNUC_MINOR__&0xff)) >= (3<<16|4)) -# define GCC_HASCLASSVISIBILITY -# endif /*defined(__GNUC__) && __GNUC_PREREQ(3,4)*/ -# ifdef GCC_HASCLASSVISIBILITY -# define DLLEXPORT __attribute__ ((visibility("default"))) -# define DLLLOCAL __attribute__ ((visibility("hidden"))) -# else -# define DLLEXPORT -# define DLLLOCAL -# endif /*GCC_HASCLASSVISIBILITY*/ -#endif /*platform ifdefs*/ - -#endif /*__VPXTYPES_H__*/ - -#undef VPXAPI -/*!\def VPXAPI - \brief library calling convention/storage class attributes. - - Specifies whether the function is imported through a dll - or is from a static library.*/ -#ifdef VPXDLL -# ifdef VPXDLLEXPORT -# define VPXAPI DLLEXPORT -# else -# define VPXAPI DLLIMPORT -# endif /*VPXDLLEXPORT*/ -#else -# define VPXAPI -#endif /*VPXDLL*/ From 9bf73f46f9ce98be0f62d5f858be3e2100ddae5d Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Mon, 14 Jan 2013 18:03:34 -0800 Subject: [PATCH 77/77] fix a number issues that cause failures During master jenkins verification proces Change-Id: I3722b8753eaf39f99b45979ce407a8ea0bea0b89 --- .../x86/vp9_subpel_variance_impl_sse2.asm | 645 ++++++++++++++++++ vp9/encoder/vp9_rdopt.c | 16 +- vp9/encoder/x86/vp9_variance_impl_sse2.asm | 606 ---------------- vp9/vp9_common.mk | 5 +- 4 files changed, 656 insertions(+), 616 deletions(-) create mode 100644 vp9/common/x86/vp9_subpel_variance_impl_sse2.asm diff --git a/vp9/common/x86/vp9_subpel_variance_impl_sse2.asm b/vp9/common/x86/vp9_subpel_variance_impl_sse2.asm new file mode 100644 index 000000000..8a2a471f5 --- /dev/null +++ b/vp9/common/x86/vp9_subpel_variance_impl_sse2.asm @@ -0,0 +1,645 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +%define xmm_filter_shift 7 + +;void vp9_filter_block2d_bil_var_sse2 +;( +; unsigned char *ref_ptr, +; int ref_pixels_per_line, +; unsigned char *src_ptr, +; int src_pixels_per_line, +; unsigned int Height, +; int xoffset, +; int yoffset, +; int *sum, +; unsigned int *sumsquared;; +; +;) +global sym(vp9_filter_block2d_bil_var_sse2) PRIVATE +sym(vp9_filter_block2d_bil_var_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 9 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + push rbx + ; end prolog + + pxor xmm6, xmm6 ; + pxor xmm7, xmm7 ; + + lea rsi, [GLOBAL(xmm_bi_rd)] ; rounding + movdqa xmm4, XMMWORD PTR [rsi] + + lea rcx, [GLOBAL(bilinear_filters_sse2)] + movsxd rax, dword ptr arg(5) ; xoffset + + cmp rax, 0 ; skip first_pass filter if xoffset=0 + je filter_block2d_bil_var_sse2_sp_only + + shl rax, 5 ; point to filter coeff with xoffset + lea rax, [rax + rcx] ; HFilter + + movsxd rdx, dword ptr arg(6) ; yoffset + + cmp rdx, 0 ; skip second_pass filter if yoffset=0 + je filter_block2d_bil_var_sse2_fp_only + + shl rdx, 5 + lea rdx, [rdx + rcx] ; VFilter + + mov rsi, arg(0) ;ref_ptr + mov rdi, arg(2) ;src_ptr + movsxd rcx, dword ptr arg(4) ;Height + + pxor xmm0, xmm0 ; + movq xmm1, QWORD PTR [rsi] ; + movq xmm3, QWORD PTR [rsi+1] ; + + punpcklbw xmm1, xmm0 ; + pmullw xmm1, [rax] ; + punpcklbw xmm3, xmm0 + pmullw xmm3, [rax+16] ; + + paddw xmm1, xmm3 ; + paddw xmm1, xmm4 ; + psraw xmm1, xmm_filter_shift ; + movdqa xmm5, xmm1 + + movsxd rbx, dword ptr arg(1) ;ref_pixels_per_line + lea rsi, [rsi + rbx] +%if ABI_IS_32BIT=0 + movsxd r9, dword ptr arg(3) ;src_pixels_per_line +%endif + +filter_block2d_bil_var_sse2_loop: + movq xmm1, QWORD PTR [rsi] ; + movq xmm3, QWORD PTR [rsi+1] ; + + punpcklbw xmm1, xmm0 ; + pmullw xmm1, [rax] ; + punpcklbw xmm3, xmm0 ; + pmullw xmm3, [rax+16] ; + + paddw xmm1, xmm3 ; + paddw xmm1, xmm4 ; + psraw xmm1, xmm_filter_shift ; + + movdqa xmm3, xmm5 ; + movdqa xmm5, xmm1 ; + + pmullw xmm3, [rdx] ; + pmullw xmm1, [rdx+16] ; + paddw xmm1, xmm3 ; + paddw xmm1, xmm4 ; + psraw xmm1, xmm_filter_shift ; + + movq xmm3, QWORD PTR [rdi] ; + punpcklbw xmm3, xmm0 ; + + psubw xmm1, xmm3 ; + paddw xmm6, xmm1 ; + + pmaddwd xmm1, xmm1 ; + paddd xmm7, xmm1 ; + + lea rsi, [rsi + rbx] ;ref_pixels_per_line +%if ABI_IS_32BIT + add rdi, dword ptr arg(3) ;src_pixels_per_line +%else + lea rdi, [rdi + r9] +%endif + + sub rcx, 1 ; + jnz filter_block2d_bil_var_sse2_loop ; + + jmp filter_block2d_bil_variance + +filter_block2d_bil_var_sse2_sp_only: + movsxd rdx, dword ptr arg(6) ; yoffset + + cmp rdx, 0 ; skip all if both xoffset=0 and yoffset=0 + je filter_block2d_bil_var_sse2_full_pixel + + shl rdx, 5 + lea rdx, [rdx + rcx] ; VFilter + + mov rsi, arg(0) ;ref_ptr + mov rdi, arg(2) ;src_ptr + movsxd rcx, dword ptr arg(4) ;Height + movsxd rax, dword ptr arg(1) ;ref_pixels_per_line + + pxor xmm0, xmm0 ; + movq xmm1, QWORD PTR [rsi] ; + punpcklbw xmm1, xmm0 ; + + movsxd rbx, dword ptr arg(3) ;src_pixels_per_line + lea rsi, [rsi + rax] + +filter_block2d_bil_sp_only_loop: + movq xmm3, QWORD PTR [rsi] ; + punpcklbw xmm3, xmm0 ; + movdqa xmm5, xmm3 + + pmullw xmm1, [rdx] ; + pmullw xmm3, [rdx+16] ; + paddw xmm1, xmm3 ; + paddw xmm1, xmm4 ; + psraw xmm1, xmm_filter_shift ; + + movq xmm3, QWORD PTR [rdi] ; + punpcklbw xmm3, xmm0 ; + + psubw xmm1, xmm3 ; + paddw xmm6, xmm1 ; + + pmaddwd xmm1, xmm1 ; + paddd xmm7, xmm1 ; + + movdqa xmm1, xmm5 ; + lea rsi, [rsi + rax] ;ref_pixels_per_line + lea rdi, [rdi + rbx] ;src_pixels_per_line + + sub rcx, 1 ; + jnz filter_block2d_bil_sp_only_loop ; + + jmp filter_block2d_bil_variance + +filter_block2d_bil_var_sse2_full_pixel: + mov rsi, arg(0) ;ref_ptr + mov rdi, arg(2) ;src_ptr + movsxd rcx, dword ptr arg(4) ;Height + movsxd rax, dword ptr arg(1) ;ref_pixels_per_line + movsxd rbx, dword ptr arg(3) ;src_pixels_per_line + pxor xmm0, xmm0 ; + +filter_block2d_bil_full_pixel_loop: + movq xmm1, QWORD PTR [rsi] ; + punpcklbw xmm1, xmm0 ; + + movq xmm2, QWORD PTR [rdi] ; + punpcklbw xmm2, xmm0 ; + + psubw xmm1, xmm2 ; + paddw xmm6, xmm1 ; + + pmaddwd xmm1, xmm1 ; + paddd xmm7, xmm1 ; + + lea rsi, [rsi + rax] ;ref_pixels_per_line + lea rdi, [rdi + rbx] ;src_pixels_per_line + + sub rcx, 1 ; + jnz filter_block2d_bil_full_pixel_loop ; + + jmp filter_block2d_bil_variance + +filter_block2d_bil_var_sse2_fp_only: + mov rsi, arg(0) ;ref_ptr + mov rdi, arg(2) ;src_ptr + movsxd rcx, dword ptr arg(4) ;Height + movsxd rdx, dword ptr arg(1) ;ref_pixels_per_line + + pxor xmm0, xmm0 ; + movsxd rbx, dword ptr arg(3) ;src_pixels_per_line + +filter_block2d_bil_fp_only_loop: + movq xmm1, QWORD PTR [rsi] ; + movq xmm3, QWORD PTR [rsi+1] ; + + punpcklbw xmm1, xmm0 ; + pmullw xmm1, [rax] ; + punpcklbw xmm3, xmm0 ; + pmullw xmm3, [rax+16] ; + + paddw xmm1, xmm3 ; + paddw xmm1, xmm4 ; + psraw xmm1, xmm_filter_shift ; + + movq xmm3, QWORD PTR [rdi] ; + punpcklbw xmm3, xmm0 ; + + psubw xmm1, xmm3 ; + paddw xmm6, xmm1 ; + + pmaddwd xmm1, xmm1 ; + paddd xmm7, xmm1 ; + lea rsi, [rsi + rdx] + lea rdi, [rdi + rbx] ;src_pixels_per_line + + sub rcx, 1 ; + jnz filter_block2d_bil_fp_only_loop ; + + jmp filter_block2d_bil_variance + +filter_block2d_bil_variance: + movdq2q mm6, xmm6 ; + movdq2q mm7, xmm7 ; + + psrldq xmm6, 8 + psrldq xmm7, 8 + + movdq2q mm2, xmm6 + movdq2q mm3, xmm7 + + paddw mm6, mm2 + paddd mm7, mm3 + + pxor mm3, mm3 ; + pxor mm2, mm2 ; + + punpcklwd mm2, mm6 ; + punpckhwd mm3, mm6 ; + + paddd mm2, mm3 ; + movq mm6, mm2 ; + + psrlq mm6, 32 ; + paddd mm2, mm6 ; + + psrad mm2, 16 ; + movq mm4, mm7 ; + + psrlq mm4, 32 ; + paddd mm4, mm7 ; + + mov rsi, arg(7) ; sum + mov rdi, arg(8) ; sumsquared + + movd [rsi], mm2 ; xsum + movd [rdi], mm4 ; xxsum + + ; begin epilog + pop rbx + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + + +;void vp9_half_horiz_vert_variance16x_h_sse2 +;( +; unsigned char *ref_ptr, +; int ref_pixels_per_line, +; unsigned char *src_ptr, +; int src_pixels_per_line, +; unsigned int Height, +; int *sum, +; unsigned int *sumsquared +;) +global sym(vp9_half_horiz_vert_variance16x_h_sse2) PRIVATE +sym(vp9_half_horiz_vert_variance16x_h_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + pxor xmm6, xmm6 ; error accumulator + pxor xmm7, xmm7 ; sse eaccumulator + mov rsi, arg(0) ;ref_ptr ; + + mov rdi, arg(2) ;src_ptr ; + movsxd rcx, dword ptr arg(4) ;Height ; + movsxd rax, dword ptr arg(1) ;ref_pixels_per_line + movsxd rdx, dword ptr arg(3) ;src_pixels_per_line + + pxor xmm0, xmm0 ; + + movdqu xmm5, XMMWORD PTR [rsi] + movdqu xmm3, XMMWORD PTR [rsi+1] + pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1 + + lea rsi, [rsi + rax] + +.half_horiz_vert_variance16x_h_1: + movdqu xmm1, XMMWORD PTR [rsi] ; + movdqu xmm2, XMMWORD PTR [rsi+1] ; + pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1 + + pavgb xmm5, xmm1 ; xmm = vertical average of the above + + movdqa xmm4, xmm5 + punpcklbw xmm5, xmm0 ; xmm5 = words of above + punpckhbw xmm4, xmm0 + + movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7 + punpcklbw xmm3, xmm0 ; xmm3 = words of above + psubw xmm5, xmm3 ; xmm5 -= xmm3 + + movq xmm3, QWORD PTR [rdi+8] + punpcklbw xmm3, xmm0 + psubw xmm4, xmm3 + + paddw xmm6, xmm5 ; xmm6 += accumulated column differences + paddw xmm6, xmm4 + pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 + pmaddwd xmm4, xmm4 + paddd xmm7, xmm5 ; xmm7 += accumulated square column differences + paddd xmm7, xmm4 + + movdqa xmm5, xmm1 ; save xmm1 for use on the next row + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + + sub rcx, 1 ; + jnz .half_horiz_vert_variance16x_h_1 ; + + pxor xmm1, xmm1 + pxor xmm5, xmm5 + + punpcklwd xmm0, xmm6 + punpckhwd xmm1, xmm6 + psrad xmm0, 16 + psrad xmm1, 16 + paddd xmm0, xmm1 + movdqa xmm1, xmm0 + + movdqa xmm6, xmm7 + punpckldq xmm6, xmm5 + punpckhdq xmm7, xmm5 + paddd xmm6, xmm7 + + punpckldq xmm0, xmm5 + punpckhdq xmm1, xmm5 + paddd xmm0, xmm1 + + movdqa xmm7, xmm6 + movdqa xmm1, xmm0 + + psrldq xmm7, 8 + psrldq xmm1, 8 + + paddd xmm6, xmm7 + paddd xmm0, xmm1 + + mov rsi, arg(5) ;[Sum] + mov rdi, arg(6) ;[SSE] + + movd [rsi], xmm0 + movd [rdi], xmm6 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vp9_half_vert_variance16x_h_sse2 +;( +; unsigned char *ref_ptr, +; int ref_pixels_per_line, +; unsigned char *src_ptr, +; int src_pixels_per_line, +; unsigned int Height, +; int *sum, +; unsigned int *sumsquared +;) +global sym(vp9_half_vert_variance16x_h_sse2) PRIVATE +sym(vp9_half_vert_variance16x_h_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + pxor xmm6, xmm6 ; error accumulator + pxor xmm7, xmm7 ; sse eaccumulator + mov rsi, arg(0) ;ref_ptr + + mov rdi, arg(2) ;src_ptr + movsxd rcx, dword ptr arg(4) ;Height + movsxd rax, dword ptr arg(1) ;ref_pixels_per_line + movsxd rdx, dword ptr arg(3) ;src_pixels_per_line + + movdqu xmm5, XMMWORD PTR [rsi] + lea rsi, [rsi + rax ] + pxor xmm0, xmm0 + +.half_vert_variance16x_h_1: + movdqu xmm3, XMMWORD PTR [rsi] + + pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) + movdqa xmm4, xmm5 + punpcklbw xmm5, xmm0 + punpckhbw xmm4, xmm0 + + movq xmm2, QWORD PTR [rdi] + punpcklbw xmm2, xmm0 + psubw xmm5, xmm2 + movq xmm2, QWORD PTR [rdi+8] + punpcklbw xmm2, xmm0 + psubw xmm4, xmm2 + + paddw xmm6, xmm5 ; xmm6 += accumulated column differences + paddw xmm6, xmm4 + pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 + pmaddwd xmm4, xmm4 + paddd xmm7, xmm5 ; xmm7 += accumulated square column differences + paddd xmm7, xmm4 + + movdqa xmm5, xmm3 + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + + sub rcx, 1 + jnz .half_vert_variance16x_h_1 + + pxor xmm1, xmm1 + pxor xmm5, xmm5 + + punpcklwd xmm0, xmm6 + punpckhwd xmm1, xmm6 + psrad xmm0, 16 + psrad xmm1, 16 + paddd xmm0, xmm1 + movdqa xmm1, xmm0 + + movdqa xmm6, xmm7 + punpckldq xmm6, xmm5 + punpckhdq xmm7, xmm5 + paddd xmm6, xmm7 + + punpckldq xmm0, xmm5 + punpckhdq xmm1, xmm5 + paddd xmm0, xmm1 + + movdqa xmm7, xmm6 + movdqa xmm1, xmm0 + + psrldq xmm7, 8 + psrldq xmm1, 8 + + paddd xmm6, xmm7 + paddd xmm0, xmm1 + + mov rsi, arg(5) ;[Sum] + mov rdi, arg(6) ;[SSE] + + movd [rsi], xmm0 + movd [rdi], xmm6 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vp9_half_horiz_variance16x_h_sse2 +;( +; unsigned char *ref_ptr, +; int ref_pixels_per_line, +; unsigned char *src_ptr, +; int src_pixels_per_line, +; unsigned int Height, +; int *sum, +; unsigned int *sumsquared +;) +global sym(vp9_half_horiz_variance16x_h_sse2) PRIVATE +sym(vp9_half_horiz_variance16x_h_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + SAVE_XMM 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + pxor xmm6, xmm6 ; error accumulator + pxor xmm7, xmm7 ; sse eaccumulator + mov rsi, arg(0) ;ref_ptr ; + + mov rdi, arg(2) ;src_ptr ; + movsxd rcx, dword ptr arg(4) ;Height ; + movsxd rax, dword ptr arg(1) ;ref_pixels_per_line + movsxd rdx, dword ptr arg(3) ;src_pixels_per_line + + pxor xmm0, xmm0 ; + +.half_horiz_variance16x_h_1: + movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15 + movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16 + + pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) + movdqa xmm1, xmm5 + punpcklbw xmm5, xmm0 ; xmm5 = words of above + punpckhbw xmm1, xmm0 + + movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7 + punpcklbw xmm3, xmm0 ; xmm3 = words of above + movq xmm2, QWORD PTR [rdi+8] + punpcklbw xmm2, xmm0 + + psubw xmm5, xmm3 ; xmm5 -= xmm3 + psubw xmm1, xmm2 + paddw xmm6, xmm5 ; xmm6 += accumulated column differences + paddw xmm6, xmm1 + pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 + pmaddwd xmm1, xmm1 + paddd xmm7, xmm5 ; xmm7 += accumulated square column differences + paddd xmm7, xmm1 + + lea rsi, [rsi + rax] + lea rdi, [rdi + rdx] + + sub rcx, 1 ; + jnz .half_horiz_variance16x_h_1 ; + + pxor xmm1, xmm1 + pxor xmm5, xmm5 + + punpcklwd xmm0, xmm6 + punpckhwd xmm1, xmm6 + psrad xmm0, 16 + psrad xmm1, 16 + paddd xmm0, xmm1 + movdqa xmm1, xmm0 + + movdqa xmm6, xmm7 + punpckldq xmm6, xmm5 + punpckhdq xmm7, xmm5 + paddd xmm6, xmm7 + + punpckldq xmm0, xmm5 + punpckhdq xmm1, xmm5 + paddd xmm0, xmm1 + + movdqa xmm7, xmm6 + movdqa xmm1, xmm0 + + psrldq xmm7, 8 + psrldq xmm1, 8 + + paddd xmm6, xmm7 + paddd xmm0, xmm1 + + mov rsi, arg(5) ;[Sum] + mov rdi, arg(6) ;[SSE] + + movd [rsi], xmm0 + movd [rdi], xmm6 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +; short xmm_bi_rd[8] = { 64, 64, 64, 64,64, 64, 64, 64}; +align 16 +xmm_bi_rd: + times 8 dw 64 +align 16 +bilinear_filters_sse2: + dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 + dw 120, 120, 120, 120, 120, 120, 120, 120, 8, 8, 8, 8, 8, 8, 8, 8 + dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 + dw 104, 104, 104, 104, 104, 104, 104, 104, 24, 24, 24, 24, 24, 24, 24, 24 + dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 + dw 88, 88, 88, 88, 88, 88, 88, 88, 40, 40, 40, 40, 40, 40, 40, 40 + dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 + dw 72, 72, 72, 72, 72, 72, 72, 72, 56, 56, 56, 56, 56, 56, 56, 56 + dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 + dw 56, 56, 56, 56, 56, 56, 56, 56, 72, 72, 72, 72, 72, 72, 72, 72 + dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 + dw 40, 40, 40, 40, 40, 40, 40, 40, 88, 88, 88, 88, 88, 88, 88, 88 + dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 + dw 24, 24, 24, 24, 24, 24, 24, 24, 104, 104, 104, 104, 104, 104, 104, 104 + dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 + dw 8, 8, 8, 8, 8, 8, 8, 8, 120, 120, 120, 120, 120, 120, 120, 120 diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 823476fb3..5e76d9372 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -4287,10 +4287,10 @@ void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, int *returndist) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - int rate_y, rate_uv; - int rate_y_tokenonly, rate_uv_tokenonly; - int dist_y, dist_uv; - int y_skip, uv_skip; + int rate_y = 0, rate_uv; + int rate_y_tokenonly = 0, rate_uv_tokenonly; + int dist_y = 0, dist_uv; + int y_skip = 0, uv_skip; int64_t txfm_cache[NB_TXFM_MODES]; rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, @@ -4315,10 +4315,10 @@ void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, int *returndist) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - int rate_y, rate_uv; - int rate_y_tokenonly, rate_uv_tokenonly; - int dist_y, dist_uv; - int y_skip, uv_skip; + int rate_y = 0, rate_uv; + int rate_y_tokenonly = 0, rate_uv_tokenonly; + int dist_y = 0, dist_uv; + int y_skip = 0, uv_skip; int64_t txfm_cache[NB_TXFM_MODES]; rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly, diff --git a/vp9/encoder/x86/vp9_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_variance_impl_sse2.asm index 399926900..896dd185d 100644 --- a/vp9/encoder/x86/vp9_variance_impl_sse2.asm +++ b/vp9/encoder/x86/vp9_variance_impl_sse2.asm @@ -400,286 +400,6 @@ sym(vp9_get8x8var_sse2): pop rbp ret -;void vp9_filter_block2d_bil_var_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int xoffset, -; int yoffset, -; int *sum, -; unsigned int *sumsquared;; -; -;) -global sym(vp9_filter_block2d_bil_var_sse2) PRIVATE -sym(vp9_filter_block2d_bil_var_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - push rbx - ; end prolog - - pxor xmm6, xmm6 ; - pxor xmm7, xmm7 ; - - lea rsi, [GLOBAL(xmm_bi_rd)] ; rounding - movdqa xmm4, XMMWORD PTR [rsi] - - lea rcx, [GLOBAL(bilinear_filters_sse2)] - movsxd rax, dword ptr arg(5) ; xoffset - - cmp rax, 0 ; skip first_pass filter if xoffset=0 - je filter_block2d_bil_var_sse2_sp_only - - shl rax, 5 ; point to filter coeff with xoffset - lea rax, [rax + rcx] ; HFilter - - movsxd rdx, dword ptr arg(6) ; yoffset - - cmp rdx, 0 ; skip second_pass filter if yoffset=0 - je filter_block2d_bil_var_sse2_fp_only - - shl rdx, 5 - lea rdx, [rdx + rcx] ; VFilter - - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - - pxor xmm0, xmm0 ; - movq xmm1, QWORD PTR [rsi] ; - movq xmm3, QWORD PTR [rsi+1] ; - - punpcklbw xmm1, xmm0 ; - pmullw xmm1, [rax] ; - punpcklbw xmm3, xmm0 - pmullw xmm3, [rax+16] ; - - paddw xmm1, xmm3 ; - paddw xmm1, xmm4 ; - psraw xmm1, xmm_filter_shift ; - movdqa xmm5, xmm1 - - movsxd rbx, dword ptr arg(1) ;ref_pixels_per_line - lea rsi, [rsi + rbx] -%if ABI_IS_32BIT=0 - movsxd r9, dword ptr arg(3) ;src_pixels_per_line -%endif - -filter_block2d_bil_var_sse2_loop: - movq xmm1, QWORD PTR [rsi] ; - movq xmm3, QWORD PTR [rsi+1] ; - - punpcklbw xmm1, xmm0 ; - pmullw xmm1, [rax] ; - punpcklbw xmm3, xmm0 ; - pmullw xmm3, [rax+16] ; - - paddw xmm1, xmm3 ; - paddw xmm1, xmm4 ; - psraw xmm1, xmm_filter_shift ; - - movdqa xmm3, xmm5 ; - movdqa xmm5, xmm1 ; - - pmullw xmm3, [rdx] ; - pmullw xmm1, [rdx+16] ; - paddw xmm1, xmm3 ; - paddw xmm1, xmm4 ; - psraw xmm1, xmm_filter_shift ; - - movq xmm3, QWORD PTR [rdi] ; - punpcklbw xmm3, xmm0 ; - - psubw xmm1, xmm3 ; - paddw xmm6, xmm1 ; - - pmaddwd xmm1, xmm1 ; - paddd xmm7, xmm1 ; - - lea rsi, [rsi + rbx] ;ref_pixels_per_line -%if ABI_IS_32BIT - add rdi, dword ptr arg(3) ;src_pixels_per_line -%else - lea rdi, [rdi + r9] -%endif - - sub rcx, 1 ; - jnz filter_block2d_bil_var_sse2_loop ; - - jmp filter_block2d_bil_variance - -filter_block2d_bil_var_sse2_sp_only: - movsxd rdx, dword ptr arg(6) ; yoffset - - cmp rdx, 0 ; skip all if both xoffset=0 and yoffset=0 - je filter_block2d_bil_var_sse2_full_pixel - - shl rdx, 5 - lea rdx, [rdx + rcx] ; VFilter - - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - - pxor xmm0, xmm0 ; - movq xmm1, QWORD PTR [rsi] ; - punpcklbw xmm1, xmm0 ; - - movsxd rbx, dword ptr arg(3) ;src_pixels_per_line - lea rsi, [rsi + rax] - -filter_block2d_bil_sp_only_loop: - movq xmm3, QWORD PTR [rsi] ; - punpcklbw xmm3, xmm0 ; - movdqa xmm5, xmm3 - - pmullw xmm1, [rdx] ; - pmullw xmm3, [rdx+16] ; - paddw xmm1, xmm3 ; - paddw xmm1, xmm4 ; - psraw xmm1, xmm_filter_shift ; - - movq xmm3, QWORD PTR [rdi] ; - punpcklbw xmm3, xmm0 ; - - psubw xmm1, xmm3 ; - paddw xmm6, xmm1 ; - - pmaddwd xmm1, xmm1 ; - paddd xmm7, xmm1 ; - - movdqa xmm1, xmm5 ; - lea rsi, [rsi + rax] ;ref_pixels_per_line - lea rdi, [rdi + rbx] ;src_pixels_per_line - - sub rcx, 1 ; - jnz filter_block2d_bil_sp_only_loop ; - - jmp filter_block2d_bil_variance - -filter_block2d_bil_var_sse2_full_pixel: - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rbx, dword ptr arg(3) ;src_pixels_per_line - pxor xmm0, xmm0 ; - -filter_block2d_bil_full_pixel_loop: - movq xmm1, QWORD PTR [rsi] ; - punpcklbw xmm1, xmm0 ; - - movq xmm2, QWORD PTR [rdi] ; - punpcklbw xmm2, xmm0 ; - - psubw xmm1, xmm2 ; - paddw xmm6, xmm1 ; - - pmaddwd xmm1, xmm1 ; - paddd xmm7, xmm1 ; - - lea rsi, [rsi + rax] ;ref_pixels_per_line - lea rdi, [rdi + rbx] ;src_pixels_per_line - - sub rcx, 1 ; - jnz filter_block2d_bil_full_pixel_loop ; - - jmp filter_block2d_bil_variance - -filter_block2d_bil_var_sse2_fp_only: - mov rsi, arg(0) ;ref_ptr - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rdx, dword ptr arg(1) ;ref_pixels_per_line - - pxor xmm0, xmm0 ; - movsxd rbx, dword ptr arg(3) ;src_pixels_per_line - -filter_block2d_bil_fp_only_loop: - movq xmm1, QWORD PTR [rsi] ; - movq xmm3, QWORD PTR [rsi+1] ; - - punpcklbw xmm1, xmm0 ; - pmullw xmm1, [rax] ; - punpcklbw xmm3, xmm0 ; - pmullw xmm3, [rax+16] ; - - paddw xmm1, xmm3 ; - paddw xmm1, xmm4 ; - psraw xmm1, xmm_filter_shift ; - - movq xmm3, QWORD PTR [rdi] ; - punpcklbw xmm3, xmm0 ; - - psubw xmm1, xmm3 ; - paddw xmm6, xmm1 ; - - pmaddwd xmm1, xmm1 ; - paddd xmm7, xmm1 ; - lea rsi, [rsi + rdx] - lea rdi, [rdi + rbx] ;src_pixels_per_line - - sub rcx, 1 ; - jnz filter_block2d_bil_fp_only_loop ; - - jmp filter_block2d_bil_variance - -filter_block2d_bil_variance: - movdq2q mm6, xmm6 ; - movdq2q mm7, xmm7 ; - - psrldq xmm6, 8 - psrldq xmm7, 8 - - movdq2q mm2, xmm6 - movdq2q mm3, xmm7 - - paddw mm6, mm2 - paddd mm7, mm3 - - pxor mm3, mm3 ; - pxor mm2, mm2 ; - - punpcklwd mm2, mm6 ; - punpckhwd mm3, mm6 ; - - paddd mm2, mm3 ; - movq mm6, mm2 ; - - psrlq mm6, 32 ; - paddd mm2, mm6 ; - - psrad mm2, 16 ; - movq mm4, mm7 ; - - psrlq mm4, 32 ; - paddd mm4, mm7 ; - - mov rsi, arg(7) ; sum - mov rdi, arg(8) ; sumsquared - - movd [rsi], mm2 ; xsum - movd [rdi], mm4 ; xxsum - - ; begin epilog - pop rbx - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - ;void vp9_half_horiz_vert_variance8x_h_sse2 ;( ; unsigned char *ref_ptr, @@ -802,122 +522,6 @@ sym(vp9_half_horiz_vert_variance8x_h_sse2): pop rbp ret -;void vp9_half_horiz_vert_variance16x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_horiz_vert_variance16x_h_sse2) PRIVATE -sym(vp9_half_horiz_vert_variance16x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rdx, dword ptr arg(3) ;src_pixels_per_line - - pxor xmm0, xmm0 ; - - movdqu xmm5, XMMWORD PTR [rsi] - movdqu xmm3, XMMWORD PTR [rsi+1] - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1 - - lea rsi, [rsi + rax] - -.half_horiz_vert_variance16x_h_1: - movdqu xmm1, XMMWORD PTR [rsi] ; - movdqu xmm2, XMMWORD PTR [rsi+1] ; - pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1 - - pavgb xmm5, xmm1 ; xmm = vertical average of the above - - movdqa xmm4, xmm5 - punpcklbw xmm5, xmm0 ; xmm5 = words of above - punpckhbw xmm4, xmm0 - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - psubw xmm5, xmm3 ; xmm5 -= xmm3 - - movq xmm3, QWORD PTR [rdi+8] - punpcklbw xmm3, xmm0 - psubw xmm4, xmm3 - - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - paddw xmm6, xmm4 - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - pmaddwd xmm4, xmm4 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - paddd xmm7, xmm4 - - movdqa xmm5, xmm1 ; save xmm1 for use on the next row - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - - sub rcx, 1 ; - jnz .half_horiz_vert_variance16x_h_1 ; - - pxor xmm1, xmm1 - pxor xmm5, xmm5 - - punpcklwd xmm0, xmm6 - punpckhwd xmm1, xmm6 - psrad xmm0, 16 - psrad xmm1, 16 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 - - movdqa xmm6, xmm7 - punpckldq xmm6, xmm5 - punpckhdq xmm7, xmm5 - paddd xmm6, xmm7 - - punpckldq xmm0, xmm5 - punpckhdq xmm1, xmm5 - paddd xmm0, xmm1 - - movdqa xmm7, xmm6 - movdqa xmm1, xmm0 - - psrldq xmm7, 8 - psrldq xmm1, 8 - - paddd xmm6, xmm7 - paddd xmm0, xmm1 - - mov rsi, arg(5) ;[Sum] - mov rdi, arg(6) ;[SSE] - - movd [rsi], xmm0 - movd [rdi], xmm6 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - ;void vp9_half_vert_variance8x_h_sse2 ;( ; unsigned char *ref_ptr, @@ -1025,113 +629,6 @@ sym(vp9_half_vert_variance8x_h_sse2): pop rbp ret -;void vp9_half_vert_variance16x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_vert_variance16x_h_sse2) PRIVATE -sym(vp9_half_vert_variance16x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr - - mov rdi, arg(2) ;src_ptr - movsxd rcx, dword ptr arg(4) ;Height - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rdx, dword ptr arg(3) ;src_pixels_per_line - - movdqu xmm5, XMMWORD PTR [rsi] - lea rsi, [rsi + rax ] - pxor xmm0, xmm0 - -.half_vert_variance16x_h_1: - movdqu xmm3, XMMWORD PTR [rsi] - - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) - movdqa xmm4, xmm5 - punpcklbw xmm5, xmm0 - punpckhbw xmm4, xmm0 - - movq xmm2, QWORD PTR [rdi] - punpcklbw xmm2, xmm0 - psubw xmm5, xmm2 - movq xmm2, QWORD PTR [rdi+8] - punpcklbw xmm2, xmm0 - psubw xmm4, xmm2 - - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - paddw xmm6, xmm4 - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - pmaddwd xmm4, xmm4 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - paddd xmm7, xmm4 - - movdqa xmm5, xmm3 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - - sub rcx, 1 - jnz .half_vert_variance16x_h_1 - - pxor xmm1, xmm1 - pxor xmm5, xmm5 - - punpcklwd xmm0, xmm6 - punpckhwd xmm1, xmm6 - psrad xmm0, 16 - psrad xmm1, 16 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 - - movdqa xmm6, xmm7 - punpckldq xmm6, xmm5 - punpckhdq xmm7, xmm5 - paddd xmm6, xmm7 - - punpckldq xmm0, xmm5 - punpckhdq xmm1, xmm5 - paddd xmm0, xmm1 - - movdqa xmm7, xmm6 - movdqa xmm1, xmm0 - - psrldq xmm7, 8 - psrldq xmm1, 8 - - paddd xmm6, xmm7 - paddd xmm0, xmm1 - - mov rsi, arg(5) ;[Sum] - mov rdi, arg(6) ;[SSE] - - movd [rsi], xmm0 - movd [rdi], xmm6 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - ;void vp9_half_horiz_variance8x_h_sse2 ;( @@ -1238,109 +735,6 @@ sym(vp9_half_horiz_variance8x_h_sse2): pop rbp ret -;void vp9_half_horiz_variance16x_h_sse2 -;( -; unsigned char *ref_ptr, -; int ref_pixels_per_line, -; unsigned char *src_ptr, -; int src_pixels_per_line, -; unsigned int Height, -; int *sum, -; unsigned int *sumsquared -;) -global sym(vp9_half_horiz_variance16x_h_sse2) PRIVATE -sym(vp9_half_horiz_variance16x_h_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - pxor xmm6, xmm6 ; error accumulator - pxor xmm7, xmm7 ; sse eaccumulator - mov rsi, arg(0) ;ref_ptr ; - - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; - movsxd rax, dword ptr arg(1) ;ref_pixels_per_line - movsxd rdx, dword ptr arg(3) ;src_pixels_per_line - - pxor xmm0, xmm0 ; - -.half_horiz_variance16x_h_1: - movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15 - movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16 - - pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) - movdqa xmm1, xmm5 - punpcklbw xmm5, xmm0 ; xmm5 = words of above - punpckhbw xmm1, xmm0 - - movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7 - punpcklbw xmm3, xmm0 ; xmm3 = words of above - movq xmm2, QWORD PTR [rdi+8] - punpcklbw xmm2, xmm0 - - psubw xmm5, xmm3 ; xmm5 -= xmm3 - psubw xmm1, xmm2 - paddw xmm6, xmm5 ; xmm6 += accumulated column differences - paddw xmm6, xmm1 - pmaddwd xmm5, xmm5 ; xmm5 *= xmm5 - pmaddwd xmm1, xmm1 - paddd xmm7, xmm5 ; xmm7 += accumulated square column differences - paddd xmm7, xmm1 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - - sub rcx, 1 ; - jnz .half_horiz_variance16x_h_1 ; - - pxor xmm1, xmm1 - pxor xmm5, xmm5 - - punpcklwd xmm0, xmm6 - punpckhwd xmm1, xmm6 - psrad xmm0, 16 - psrad xmm1, 16 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 - - movdqa xmm6, xmm7 - punpckldq xmm6, xmm5 - punpckhdq xmm7, xmm5 - paddd xmm6, xmm7 - - punpckldq xmm0, xmm5 - punpckhdq xmm1, xmm5 - paddd xmm0, xmm1 - - movdqa xmm7, xmm6 - movdqa xmm1, xmm0 - - psrldq xmm7, 8 - psrldq xmm1, 8 - - paddd xmm6, xmm7 - paddd xmm0, xmm1 - - mov rsi, arg(5) ;[Sum] - mov rdi, arg(6) ;[SSE] - - movd [rsi], xmm0 - movd [rdi], xmm6 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret SECTION_RODATA ; short xmm_bi_rd[8] = { 64, 64, 64, 64,64, 64, 64, 64}; diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index a1c284a27..0d208e9a3 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -91,11 +91,12 @@ VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_recon_mmx.asm VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_subpixel_mmx.asm VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idctllm_sse2.asm +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_iwalsh_sse2.asm +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_wrapper_sse2.c +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpel_variance_impl_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_sse2.asm -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_iwalsh_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_variance_sse2.c VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_ssse3.asm