Merge remote branch 'internal/upstream-experimental' into HEAD

Conflicts:
	vp8/decoder/detokenize.c
	vp8/decoder/onyxd_if.c
	vp8/vp8_common.mk

Change-Id: Ifca1108186a8bc715da86a44021ee2fa5550b5b8
This commit is contained in:
John Koleszar 2011-08-11 13:01:45 -04:00
commit a16cd74ba1
37 changed files with 457 additions and 607 deletions

View File

@ -2,3 +2,4 @@ Adrian Grange <agrange@google.com>
Johann Koenig <johannkoenig@google.com>
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
Tom Finegan <tomfinegan@google.com>
Ralph Giles <giles@xiph.org> <giles@entropywave.com>

12
AUTHORS
View File

@ -4,8 +4,11 @@
Aaron Watry <awatry@gmail.com>
Adrian Grange <agrange@google.com>
Alex Converse <alex.converse@gmail.com>
Alexis Ballier <aballier@gentoo.org>
Alok Ahuja <waveletcoeff@gmail.com>
Andoni Morales Alastruey <ylatuya@gmail.com>
Andres Mejia <mcitadel@gmail.com>
Aron Rosenberg <arosenberg@logitech.com>
Attila Nagy <attilanagy@google.com>
Fabio Pedretti <fabio.ped@libero.it>
Frank Galligan <fgalligan@google.com>
@ -22,20 +25,29 @@ Jeff Muizelaar <jmuizelaar@mozilla.com>
Jim Bankoski <jimbankoski@google.com>
Johann Koenig <johannkoenig@google.com>
John Koleszar <jkoleszar@google.com>
Joshua Bleecher Snyder <josh@treelinelabs.com>
Justin Clift <justin@salasaga.org>
Justin Lebar <justin.lebar@gmail.com>
Lou Quillio <louquillio@google.com>
Luca Barbato <lu_zero@gentoo.org>
Makoto Kato <makoto.kt@gmail.com>
Martin Ettl <ettl.martin78@googlemail.com>
Michael Kohler <michaelkohler@live.com>
Mike Hommey <mhommey@mozilla.com>
Mikhal Shemer <mikhal@google.com>
Pascal Massimino <pascal.massimino@gmail.com>
Patrik Westin <patrik.westin@gmail.com>
Paul Wilkins <paulwilkins@google.com>
Pavol Rusnak <stick@gk2.sk>
Philip Jägenstedt <philipj@opera.com>
Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
Ralph Giles <giles@xiph.org>
Ronald S. Bultje <rbultje@google.com>
Scott LaVarnway <slavarnway@google.com>
Stefan Holmer <holmer@google.com>
Taekhyun Kim <takim@nvidia.com>
Tero Rintaluoma <teror@google.com>
Thijs Vermeir <thijsvermeir@gmail.com>
Timothy B. Terriberry <tterribe@xiph.org>
Tom Finegan <tomfinegan@google.com>
Yaowu Xu <yaowu@google.com>

View File

@ -1,3 +1,85 @@
2011-08-02 v0.9.7 "Cayuga"
Our third named release, focused on a faster, higher quality, encoder.
- Upgrading:
This release is backwards compatible with Aylesbury (v0.9.5) and
Bali (v0.9.6). Users of older releases should refer to the Upgrading
notes in this document for that release.
- Enhancements:
Stereo 3D format support for vpxenc
Runtime detection of available processor cores.
Allow specifying --end-usage by enum name
vpxdec: test for frame corruption
vpxenc: add quantizer histogram display
vpxenc: add rate histogram display
Set VPX_FRAME_IS_DROPPABLE
update configure for ios sdk 4.3
Avoid text relocations in ARM vp8 decoder
Generate a vpx.pc file for pkg-config.
New ways of passing encoded data between encoder and decoder.
- Speed:
This release includes across-the-board speed improvements to the
encoder. On x86, these measure at approximately 11.5% in Best mode,
21.5% in Good mode (speed 0), and 22.5% in Realtime mode (speed 6).
On ARM Cortex A9 with Neon extensions, real-time encoding of video
telephony content is 35% faster than Bali on single core and 48%
faster on multi-core. On the NVidia Tegra2 platform, real time
encoding is 40% faster than Bali.
Decoder speed was not a priority for this release, but improved
approximately 8.4% on x86.
Reduce motion vector search on alt-ref frame.
Encoder loopfilter running in its own thread
Reworked loopfilter to precalculate more parameters
SSE2/SSSE3 optimizations for build_predictors_mbuv{,_s}().
Make hor UV predict ~2x faster (73 vs 132 cycles) using SSSE3.
Removed redundant checks
Reduced structure sizes
utilize preload in ARMv6 MC/LPF/Copy routines
ARM optimized quantization, dfct, variance, subtract
Increase chrow row alignment to 16 bytes.
disable trellis optimization for first pass
Write SSSE3 sub-pixel filter function
Improve SSE2 half-pixel filter funtions
Add vp8_sub_pixel_variance16x8_ssse3 function
Reduce unnecessary distortion computation
Use diamond search to replace full search
Preload reference area in sub-pixel motion search (real-time mode)
- Quality:
This release focused primarily on one-pass use cases, including
video conferencing. Low latency data rate control was significantly
improved, improving streamability over bandwidth constrained links.
Added support for error concealment, allowing frames to maintain
visual quality in the presence of substantial packet loss.
Add rc_max_intra_bitrate_pct control
Limit size of initial keyframe in one-pass.
Improve framerate adaptation
Improved 1-pass CBR rate control
Improved KF insertion after fades to still.
Improved key frame detection.
Improved activity masking (lower PSNR impact for same SSIM boost)
Improved interaction between GF and ARFs
Adding error-concealment to the decoder.
Adding support for independent partitions
Adjusted rate-distortion constants
- Bug Fixes:
Removed firstpass motion map
Fix parallel make install
Fix multithreaded encoding for 1 MB wide frame
Fixed iwalsh_neon build problems with RVDS4.1
Fix semaphore emulation, spin-wait intrinsics on Windows
Fix build with xcode4 and simplify GLOBAL.
Mark ARM asm objects as allowing a non-executable stack.
Fix vpxenc encoding incorrect webm file header on big endian
2011-03-07 v0.9.6 "Bali"
Our second named release, focused on a faster, higher quality, encoder.

View File

@ -336,6 +336,7 @@ ifneq ($(call enabled,DIST-SRCS),)
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_proj.sh
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh
DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/yasm.rules
DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/obj_int_extract.bat
DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
# Include obj_int_extract if we use offsets from asm_*_offsets
DIST-SRCS-$(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64) += build/make/obj_int_extract.c

View File

@ -952,6 +952,10 @@ process_common_toolchain() {
# shared objects
enabled gcc && enabled pic && check_add_cflags -fPIC
# Work around longjmp interception on glibc >= 2.11, to improve binary
# compatibility. See http://code.google.com/p/webm/issues/detail?id=166
enabled linux && check_add_cflags -D_FORTIFY_SOURCE=0
# Check for strip utility variant
${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable gnu_strip

4
configure vendored
View File

@ -40,6 +40,7 @@ Advanced options:
${toggle_error_concealment} enable this option to get a decoder which is able to conceal losses
${toggle_runtime_cpu_detect} runtime cpu detection
${toggle_shared} shared library support
${toggle_static} static library support
${toggle_small} favor smaller size over speed
${toggle_postproc_visualizer} macro block / block level visualizers
@ -153,6 +154,7 @@ enabled doxygen && php -v >/dev/null 2>&1 && enable install_docs
enable install_bins
enable install_libs
enable static
enable optimizations
enable fast_unaligned #allow unaligned accesses, if supported by hw
enable md5
@ -258,6 +260,7 @@ CONFIG_LIST="
realtime_only
error_concealment
shared
static
small
postproc_visualizer
os_support
@ -302,6 +305,7 @@ CMDLINE_SELECT="
realtime_only
error_concealment
shared
static
small
postproc_visualizer
experimental

View File

@ -167,8 +167,10 @@ BINS-$(NOT_MSVS) += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=))
# Instantiate linker template for all examples.
CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
$(foreach bin,$(BINS-yes),\
$(if $(BUILD_OBJS),$(eval $(bin): $(LIB_PATH)/lib$(CODEC_LIB).a))\
$(if $(BUILD_OBJS),$(eval $(bin):\
$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF)))\
$(if $(BUILD_OBJS),$(eval $(call linker_template,$(bin),\
$(call objs,$($(notdir $(bin)).SRCS)) \
-l$(CODEC_LIB) $(addprefix -l,$(CODEC_EXTRA_LIBS))\

45
libs.mk
View File

@ -35,6 +35,7 @@ ifeq ($(CONFIG_VP8_ENCODER),yes)
CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS))
CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS))
CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h vpx/vp8e.h
CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp8cx_arm.mk
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8e.h include/vpx/vp8cx.h
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
@ -47,6 +48,7 @@ ifeq ($(CONFIG_VP8_DECODER),yes)
CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS))
CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS))
CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk vpx/vp8.h vpx/vp8dx.h
CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp8dx_arm.mk
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
@ -89,6 +91,7 @@ $(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
CODEC_SRCS-$(BUILD_LIBVPX) += vpx/vpx_integer.h
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/asm_offsets.h
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_timer.h
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem.h
CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
@ -100,7 +103,7 @@ CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c
endif
CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c
CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm
CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm.h
CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
@ -121,7 +124,7 @@ INSTALL-LIBS-$(CONFIG_SHARED) += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/v
INSTALL-LIBS-$(CONFIG_SHARED) += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/vpx.exp)
endif
else
INSTALL-LIBS-yes += $(LIBSUBDIR)/libvpx.a
INSTALL-LIBS-$(CONFIG_STATIC) += $(LIBSUBDIR)/libvpx.a
INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(LIBSUBDIR)/libvpx_g.a
endif
@ -177,12 +180,13 @@ endif
else
LIBVPX_OBJS=$(call objs,$(CODEC_SRCS))
OBJS-$(BUILD_LIBVPX) += $(LIBVPX_OBJS)
LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
LIBS-$(if $(BUILD_LIBVPX),$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
BUILD_LIBVPX_SO := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
LIBVPX_SO := libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)
LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)
LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)\
$(notdir $(LIBVPX_SO_SYMLINKS))
$(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) libvpx.ver
$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR)
@ -198,9 +202,18 @@ libvpx.ver: $(call enabled,CODEC_EXPORTS)
$(qexec)echo "local: *; };" >> $@
CLEAN-OBJS += libvpx.ver
$(addprefix $(DIST_DIR)/,$(LIBVPX_SO_SYMLINKS)): $(DIST_DIR)/$(LIBSUBDIR)/$(LIBVPX_SO)
@echo " [LN] $@"
$(qexec)ln -sf $(LIBVPX_SO) $@
define libvpx_symlink_template
$(1): $(2)
@echo " [LN] $$@"
$(qexec)ln -sf $(LIBVPX_SO) $$@
endef
$(eval $(call libvpx_symlink_template,\
$(addprefix $(BUILD_PFX),$(notdir $(LIBVPX_SO_SYMLINKS))),\
$(BUILD_PFX)$(LIBVPX_SO)))
$(eval $(call libvpx_symlink_template,\
$(addprefix $(DIST_DIR)/,$(LIBVPX_SO_SYMLINKS)),\
$(DIST_DIR)/$(LIBSUBDIR)/$(LIBVPX_SO)))
INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
@ -259,20 +272,20 @@ $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
#
ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.S
$(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S
grep EQU $< | tr -d '$$\#' $(ADS2GAS) > $@
$(VP8_PREFIX)common/asm_com_offsets.c.S: vp8/common/asm_com_offsets.c
CLEAN-OBJS += asm_com_offsets.asm $(VP8_PREFIX)common/asm_com_offsets.c.S
$(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S: $(VP8_PREFIX)common/asm_com_offsets.c
CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S
asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.S
$(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S
grep EQU $< | tr -d '$$\#' $(ADS2GAS) > $@
$(VP8_PREFIX)encoder/asm_enc_offsets.c.S: vp8/encoder/asm_enc_offsets.c
CLEAN-OBJS += asm_enc_offsets.asm $(VP8_PREFIX)encoder/asm_enc_offsets.c.S
$(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S: $(VP8_PREFIX)encoder/asm_enc_offsets.c
CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S
asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.S
$(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S
grep EQU $< | tr -d '$$\#' $(ADS2GAS) > $@
$(VP8_PREFIX)decoder/asm_dec_offsets.c.S: vp8/decoder/asm_dec_offsets.c
CLEAN-OBJS += asm_dec_offsets.asm $(VP8_PREFIX)decoder/asm_dec_offsets.c.S
$(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S: $(VP8_PREFIX)decoder/asm_dec_offsets.c
CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S
else
ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
asm_com_offsets.asm: obj_int_extract

View File

@ -9,6 +9,8 @@
*/
#include "vpx_config.h"
#include "vpx/vpx_codec.h"
#include "vpx_ports/asm_offsets.h"
#include "vpx_scale/yv12config.h"
@ -25,8 +27,14 @@ DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_b
DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
DEFINE(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS);
END
/* add asserts for any offset that is not supported by assembly code */
/* add asserts for any size that is not supported by assembly code */
#if HAVE_ARMV7
/* vp8_yv12_extend_frame_borders_neon makes several assumptions based on this */
ct_assert(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS == 32)
#endif

View File

@ -211,8 +211,8 @@ extern "C"
// receive a frames worth of data caller can assume that a copy of this frame is made
// and not just a copy of the pointer..
int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time_stamp);
int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush);
int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, int64_t *time_stamp, int64_t *time_end, int flush);
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags);

View File

@ -19,7 +19,9 @@
#include "entropy.h"
#include "idct.h"
#include "recon.h"
#if CONFIG_POSTPROC
#include "postproc.h"
#endif
/*#ifdef PACKET_TESTING*/
#include "header.h"
@ -78,7 +80,9 @@ typedef struct VP8_COMMON_RTCD
vp8_recon_rtcd_vtable_t recon;
vp8_subpix_rtcd_vtable_t subpix;
vp8_loopfilter_rtcd_vtable_t loopfilter;
#if CONFIG_POSTPROC
vp8_postproc_rtcd_vtable_t postproc;
#endif
int flags;
#else
int unused;
@ -205,7 +209,9 @@ typedef struct VP8Common
#if CONFIG_MULTITHREAD
int processor_core_count;
#endif
#if CONFIG_POSTPROC
struct postproc_state postproc_state;
#endif
} VP8_COMMON;
#endif

View File

@ -54,8 +54,8 @@ extern "C"
int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst);
int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, INT64 time_stamp);
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags);
int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, int64_t time_stamp);
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
vpx_codec_err_t vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
vpx_codec_err_t vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);

View File

@ -13,7 +13,6 @@
#include "vpx_ports/arm.h"
#include "vp8/common/blockd.h"
#include "vp8/common/pragmas.h"
#include "vp8/common/postproc.h"
#include "vp8/decoder/dequantize.h"
#include "vp8/decoder/onyxd_int.h"

View File

@ -83,7 +83,6 @@ static const unsigned char cat6_prob[12] =
{ 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 };
#endif
void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
{
/* Clear entropy contexts for Y2 blocks */

View File

@ -244,8 +244,8 @@ vpx_codec_err_t vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, Y
/*For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.*/
#if HAVE_ARMV7
extern void vp8_push_neon(INT64 *store);
extern void vp8_pop_neon(INT64 *store);
extern void vp8_push_neon(int64_t *store);
extern void vp8_pop_neon(int64_t *store);
#endif
static int get_free_fb (VP8_COMMON *cm)
@ -344,10 +344,10 @@ static void vp8_print_yuv_rec_mb(VP8_COMMON *cm, int mb_row, int mb_col)
}
*/
int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp)
int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, int64_t time_stamp)
{
#if HAVE_ARMV7
INT64 dx_store_reg[8];
int64_t dx_store_reg[8];
#endif
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
VP8_COMMON *cm = &pbi->common;
@ -554,9 +554,9 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
#if 0
{
int i;
INT64 earliest_time = pbi->dr[0].time_stamp;
INT64 latest_time = pbi->dr[0].time_stamp;
INT64 time_diff = 0;
int64_t earliest_time = pbi->dr[0].time_stamp;
int64_t latest_time = pbi->dr[0].time_stamp;
int64_t time_diff = 0;
int bytes = 0;
pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;;
@ -596,7 +596,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
pbi->common.error.setjmp = 0;
return retcode;
}
int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags)
int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags)
{
int ret = -1;
VP8D_COMP *pbi = (VP8D_COMP *) ptr;

View File

@ -40,7 +40,7 @@ typedef struct
typedef struct
{
INT64 time_stamp;
int64_t time_stamp;
int size;
} DATARATE;
@ -121,7 +121,7 @@ typedef struct VP8Decompressor
#endif
vp8_reader *mbc;
INT64 last_time_stamp;
int64_t last_time_stamp;
int ready_for_new_data;
DATARATE dr[16];

View File

@ -336,7 +336,7 @@ static unsigned int mb_activity_measure( VP8_COMP *cpi, MACROBLOCK *x,
// Calculate an "average" mb activity value for the frame
#define ACT_MEDIAN 0
static void calc_av_activity( VP8_COMP *cpi, INT64 activity_sum )
static void calc_av_activity( VP8_COMP *cpi, int64_t activity_sum )
{
#if ACT_MEDIAN
// Find median: Simple n^2 algorithm for experimentation
@ -404,9 +404,9 @@ static void calc_activity_index( VP8_COMP *cpi, MACROBLOCK *x )
VP8_COMMON *const cm = & cpi->common;
int mb_row, mb_col;
INT64 act;
INT64 a;
INT64 b;
int64_t act;
int64_t a;
int64_t b;
#if OUTPUT_NORM_ACT_STATS
FILE *f = fopen("norm_act.stt", "a");
@ -470,7 +470,7 @@ static void build_activity_map( VP8_COMP *cpi )
int mb_row, mb_col;
unsigned int mb_activity;
INT64 activity_sum = 0;
int64_t activity_sum = 0;
// for each macroblock row in image
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
@ -537,15 +537,15 @@ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
x->errorperbit = x->rdmult * 100 /(110 * x->rddiv);
x->errorperbit += (x->errorperbit==0);
#else
INT64 a;
INT64 b;
INT64 act = *(x->mb_activity_ptr);
int64_t a;
int64_t b;
int64_t act = *(x->mb_activity_ptr);
// Apply the masking to the RD multiplier.
a = act + (2*cpi->activity_avg);
b = (2*act) + cpi->activity_avg;
x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
x->rdmult = (unsigned int)(((int64_t)x->rdmult*b + (a>>1))/a);
x->errorperbit = x->rdmult * 100 /(110 * x->rddiv);
x->errorperbit += (x->errorperbit==0);
#endif
@ -1444,18 +1444,18 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x )
#if USE_ACT_INDEX
x->act_zbin_adj = *(x->mb_activity_ptr);
#else
INT64 a;
INT64 b;
INT64 act = *(x->mb_activity_ptr);
int64_t a;
int64_t b;
int64_t act = *(x->mb_activity_ptr);
// Apply the masking to the RD multiplier.
a = act + 4*cpi->activity_avg;
b = 4*act + cpi->activity_avg;
if ( act > cpi->activity_avg )
x->act_zbin_adj = (int)(((INT64)b + (a>>1))/a) - 1;
x->act_zbin_adj = (int)(((int64_t)b + (a>>1))/a) - 1;
else
x->act_zbin_adj = 1 - (int)(((INT64)a + (b>>1))/b);
x->act_zbin_adj = 1 - (int)(((int64_t)a + (b>>1))/b);
#endif
}

View File

@ -530,8 +530,8 @@ void vp8_first_pass(VP8_COMP *cpi)
YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
int recon_y_stride = lst_yv12->y_stride;
int recon_uv_stride = lst_yv12->uv_stride;
long long intra_error = 0;
long long coded_error = 0;
int64_t intra_error = 0;
int64_t coded_error = 0;
int sum_mvr = 0, sum_mvc = 0;
int sum_mvr_abs = 0, sum_mvc_abs = 0;
@ -620,7 +620,7 @@ void vp8_first_pass(VP8_COMP *cpi)
this_error += intrapenalty;
// Cumulative intra error total
intra_error += (long long)this_error;
intra_error += (int64_t)this_error;
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
@ -757,7 +757,7 @@ void vp8_first_pass(VP8_COMP *cpi)
}
}
coded_error += (long long)this_error;
coded_error += (int64_t)this_error;
// adjust to the next column of macroblocks
x->src.y_buffer += 16;
@ -1219,8 +1219,8 @@ void vp8_init_second_pass(VP8_COMP *cpi)
cpi->twopass.total_coded_error_left = cpi->twopass.total_stats->coded_error;
cpi->twopass.start_tot_err_left = cpi->twopass.total_error_left;
//cpi->twopass.bits_left = (long long)(cpi->twopass.total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
//cpi->twopass.bits_left -= (long long)(cpi->twopass.total_stats->count * two_pass_min_rate / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
//cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
//cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats->count * two_pass_min_rate / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
// each frame can have a different duration, as the frame rate in the source
// isn't guaranteed to be constant. The frame rate prior to the first frame
@ -1230,8 +1230,8 @@ void vp8_init_second_pass(VP8_COMP *cpi)
vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats->count / cpi->twopass.total_stats->duration);
cpi->output_frame_rate = cpi->oxcf.frame_rate;
cpi->twopass.bits_left = (long long)(cpi->twopass.total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
cpi->twopass.bits_left -= (long long)(cpi->twopass.total_stats->duration * two_pass_min_rate / 10000000.0);
cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats->duration * two_pass_min_rate / 10000000.0);
cpi->twopass.clip_bits_total = cpi->twopass.bits_left;
// Calculate a minimum intra value to be used in determining the IIratio
@ -2671,16 +2671,16 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
int max_bits = frame_max_bits(cpi);
// Maximum bits for the kf group
long long max_grp_bits;
int64_t max_grp_bits;
// Default allocation based on bits left and relative
// complexity of the section
cpi->twopass.kf_group_bits = (long long)( cpi->twopass.bits_left *
cpi->twopass.kf_group_bits = (int64_t)( cpi->twopass.bits_left *
( kf_group_err /
cpi->twopass.modified_error_left ));
// Clip based on maximum per frame rate defined by the user.
max_grp_bits = (long long)max_bits * (long long)cpi->twopass.frames_to_key;
max_grp_bits = (int64_t)max_bits * (int64_t)cpi->twopass.frames_to_key;
if (cpi->twopass.kf_group_bits > max_grp_bits)
cpi->twopass.kf_group_bits = max_grp_bits;
@ -2697,19 +2697,19 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
int high_water_mark = (opt_buffer_lvl +
cpi->oxcf.maximum_buffer_size) >> 1;
long long av_group_bits;
int64_t av_group_bits;
// Av bits per frame * number of frames
av_group_bits = (long long)cpi->av_per_frame_bandwidth *
(long long)cpi->twopass.frames_to_key;
av_group_bits = (int64_t)cpi->av_per_frame_bandwidth *
(int64_t)cpi->twopass.frames_to_key;
// We are at or above the maximum.
if (cpi->buffer_level >= high_water_mark)
{
long long min_group_bits;
int64_t min_group_bits;
min_group_bits = av_group_bits +
(long long)(buffer_lvl -
(int64_t)(buffer_lvl -
high_water_mark);
if (cpi->twopass.kf_group_bits < min_group_bits)
@ -2718,11 +2718,11 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
// We are above optimal but below the maximum
else if (cpi->twopass.kf_group_bits < av_group_bits)
{
long long bits_below_av = av_group_bits -
int64_t bits_below_av = av_group_bits -
cpi->twopass.kf_group_bits;
cpi->twopass.kf_group_bits +=
(long long)((double)bits_below_av *
(int64_t)((double)bits_below_av *
(double)(buffer_lvl - opt_buffer_lvl) /
(double)(high_water_mark - opt_buffer_lvl));
}
@ -3043,8 +3043,8 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
else
{
long long clip_bits = (long long)(cpi->twopass.total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
long long over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
int64_t clip_bits = (int64_t)(cpi->twopass.total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
if ((last_kf_resampled && (kf_q > cpi->worst_quality)) || // If triggered last time the threshold for triggering again is reduced
((kf_q > cpi->worst_quality) && // Projected Q higher than allowed and ...

View File

@ -15,6 +15,7 @@
#include <stdio.h>
#include <limits.h>
#include <math.h>
#include "vp8/common/findnearmv.h"
#ifdef ENTROPY_STATS
static int mv_ref_ct [31] [4] [2];
@ -342,12 +343,26 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int bestmse = INT_MAX;
int_mv startmv;
int_mv this_mv;
unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
unsigned char *z = (*(b->base_src) + b->src);
int left, right, up, down, diag;
unsigned int sse;
int whichdir ;
int thismse;
int y_stride;
#if ARCH_X86 || ARCH_X86_64
MACROBLOCKD *xd = &x->e_mbd;
unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
unsigned char *y;
y_stride = 32;
/* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
y = xd->y_buf + y_stride + 1;
#else
unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
y_stride = d->pre_stride;
#endif
// central mv
bestmv->as_mv.row <<= 3;
@ -355,14 +370,14 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
startmv = *bestmv;
// calculate central point error
bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
*distortion = bestmse;
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
// go left then right and check error
this_mv.as_mv.row = startmv.as_mv.row;
this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (left < bestmse)
@ -374,7 +389,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.col += 8;
thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse)
@ -388,7 +403,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
// go up then down and check error
this_mv.as_mv.col = startmv.as_mv.col;
this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
thismse = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (up < bestmse)
@ -400,7 +415,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.row += 8;
thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse)
@ -423,23 +438,23 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
case 0:
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
break;
case 1:
this_mv.as_mv.col += 4;
this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
break;
case 2:
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row += 4;
thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
break;
case 3:
default:
this_mv.as_mv.col += 4;
this_mv.as_mv.row += 4;
thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
break;
}
@ -458,7 +473,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
// time to check quarter pels.
if (bestmv->as_mv.row < startmv.as_mv.row)
y -= d->pre_stride;
y -= y_stride;
if (bestmv->as_mv.col < startmv.as_mv.col)
y--;
@ -473,12 +488,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col = startmv.as_mv.col - 2;
thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
}
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@ -492,7 +507,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.col += 4;
thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse)
@ -509,12 +524,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.row & 7)
{
this_mv.as_mv.row = startmv.as_mv.row - 2;
thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
}
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@ -528,7 +543,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.row += 4;
thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse)
@ -558,12 +573,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 2;
thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
}
}
else
@ -573,12 +588,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 2;
thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
thismse = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
}
}
@ -589,12 +604,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.row & 7)
{
this_mv.as_mv.row -= 2;
thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
}
break;
@ -604,19 +619,19 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
if (startmv.as_mv.col & 7)
{
this_mv.as_mv.col -= 2;
thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
}
break;
case 3:
this_mv.as_mv.col += 2;
this_mv.as_mv.row += 2;
thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
break;
}
@ -633,7 +648,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
return bestmse;
}
int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int_mv *bestmv, int_mv *ref_mv,
int error_per_bit,
const vp8_variance_fn_ptr_t *vfp,
@ -643,11 +658,26 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
int bestmse = INT_MAX;
int_mv startmv;
int_mv this_mv;
unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
unsigned char *z = (*(b->base_src) + b->src);
int left, right, up, down, diag;
unsigned int sse;
int whichdir ;
int thismse;
int y_stride;
#if ARCH_X86 || ARCH_X86_64
MACROBLOCKD *xd = &x->e_mbd;
unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
unsigned char *y;
y_stride = 32;
/* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
y = xd->y_buf + y_stride + 1;
#else
unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
y_stride = d->pre_stride;
#endif
// central mv
bestmv->as_mv.row <<= 3;
@ -655,14 +685,14 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
startmv = *bestmv;
// calculate central point error
bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
*distortion = bestmse;
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
// go left then right and check error
this_mv.as_mv.row = startmv.as_mv.row;
this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (left < bestmse)
@ -674,7 +704,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.col += 8;
thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse)
@ -688,7 +718,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
// go up then down and check error
this_mv.as_mv.col = startmv.as_mv.col;
this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
thismse = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (up < bestmse)
@ -700,7 +730,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
}
this_mv.as_mv.row += 8;
thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse)
@ -711,8 +741,6 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
*sse1 = sse;
}
// somewhat strangely not doing all the diagonals for half pel is slower than doing them.
#if 0
// now check 1 more diagonal -
whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
this_mv = startmv;
@ -720,39 +748,28 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
switch (whichdir)
{
case 0:
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row = (this_mv.row - 8) | 4;
diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
break;
case 1:
this_mv.col += 4;
this_mv.row = (this_mv.row - 8) | 4;
diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
this_mv.as_mv.col += 4;
this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
break;
case 2:
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row += 4;
diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row += 4;
thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
break;
case 3:
this_mv.col += 4;
this_mv.row += 4;
diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
default:
this_mv.as_mv.col += 4;
this_mv.as_mv.row += 4;
thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
break;
}
diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
}
#else
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
@ -763,44 +780,6 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
*sse1 = sse;
}
this_mv.as_mv.col += 8;
thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
*distortion = thismse;
*sse1 = sse;
}
this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
this_mv.as_mv.row = startmv.as_mv.row + 4;
thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
*distortion = thismse;
*sse1 = sse;
}
this_mv.as_mv.col += 8;
thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
*distortion = thismse;
*sse1 = sse;
}
#endif
return bestmse;
}
@ -866,7 +845,7 @@ int vp8_hex_search
unsigned char *what = (*(b->base_src) + b->src);
int what_stride = b->src_stride;
int in_what_stride = d->pre_stride;
int br = ref_mv->as_mv.row, bc = ref_mv->as_mv.col;
int br, bc;
int_mv this_mv;
unsigned int bestsad = 0x7fffffff;
unsigned int thissad;
@ -880,6 +859,11 @@ int vp8_hex_search
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
// adjust ref_mv to make sure it is within MV range
vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
br = ref_mv->as_mv.row;
bc = ref_mv->as_mv.col;
// Work out the start point for the search
base_offset = (unsigned char *)(*(d->base_pre) + d->pre);
this_offset = base_offset + (br * (d->pre_stride)) + bc;
@ -1043,8 +1027,8 @@ int vp8_diamond_search_sad
int best_site = 0;
int last_site = 0;
int ref_row = ref_mv->as_mv.row;
int ref_col = ref_mv->as_mv.col;
int ref_row;
int ref_col;
int this_row_offset;
int this_col_offset;
search_site *ss;
@ -1057,8 +1041,10 @@ int vp8_diamond_search_sad
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
ref_row = ref_mv->as_mv.row;
ref_col = ref_mv->as_mv.col;
*num00 = 0;
best_mv->as_mv.row = ref_row;
best_mv->as_mv.col = ref_col;
@ -1162,8 +1148,8 @@ int vp8_diamond_search_sadx4
int best_site = 0;
int last_site = 0;
int ref_row = ref_mv->as_mv.row;
int ref_col = ref_mv->as_mv.col;
int ref_row;
int ref_col;
int this_row_offset;
int this_col_offset;
search_site *ss;
@ -1176,6 +1162,9 @@ int vp8_diamond_search_sadx4
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
ref_row = ref_mv->as_mv.row;
ref_col = ref_mv->as_mv.col;
*num00 = 0;
best_mv->as_mv.row = ref_row;
best_mv->as_mv.col = ref_col;

View File

@ -9,6 +9,7 @@
*/
#include "vpx_config.h"
#include "vp8/common/onyxc_int.h"
#include "onyx_int.h"
#include "vp8/common/systemdependent.h"
@ -24,7 +25,9 @@
#include "segmentation.h"
#include "vp8/common/g_common.h"
#include "vpx_scale/yv12extend.h"
#if CONFIG_POSTPROC
#include "vp8/common/postproc.h"
#endif
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/swapyv12buffer.h"
#include "vp8/common/threading.h"
@ -2729,6 +2732,8 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
cpi->Source = &cpi->scaled_source;
#endif
}
else
cpi->Source = sd;
}
@ -3340,7 +3345,7 @@ void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
static void update_buffer_level(VP8_COMP *cpi)
{
long long tmp;
int64_t tmp;
/* Update the buffered average bitrate.
*
@ -3410,7 +3415,7 @@ static void update_buffer_level(VP8_COMP *cpi)
*/
if(cpi->total_actual_bits > cpi->oxcf.starting_buffer_level)
{
tmp = (long long)cpi->buffered_av_per_frame_bandwidth
tmp = (int64_t)cpi->buffered_av_per_frame_bandwidth
* cpi->oxcf.maximum_buffer_size
/ cpi->av_per_frame_bandwidth;
cpi->buffer_level = cpi->oxcf.maximum_buffer_size
@ -3428,7 +3433,7 @@ static void update_buffer_level(VP8_COMP *cpi)
*/
if(cpi->total_actual_bits > cpi->oxcf.starting_buffer_level)
{
long long decayed_overshoot;
int64_t decayed_overshoot;
decayed_overshoot = cpi->accumulated_overshoot;
decayed_overshoot *= (cpi->oxcf.maximum_buffer_size
@ -4796,22 +4801,22 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
{
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
*cpi->oxcf.two_pass_vbrmin_section / 100);
cpi->twopass.bits_left += (long long)(two_pass_min_rate / cpi->oxcf.frame_rate);
cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->oxcf.frame_rate);
}
}
#endif
//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.
#if HAVE_ARMV7
extern void vp8_push_neon(INT64 *store);
extern void vp8_pop_neon(INT64 *store);
extern void vp8_push_neon(int64_t *store);
extern void vp8_pop_neon(int64_t *store);
#endif
int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time)
int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time)
{
#if HAVE_ARMV7
INT64 store_reg[8];
int64_t store_reg[8];
#endif
VP8_COMP *cpi = (VP8_COMP *) ptr;
VP8_COMMON *cm = &cpi->common;
@ -4862,10 +4867,10 @@ static int frame_is_reference(const VP8_COMP *cpi)
}
int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush)
int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, int64_t *time_stamp, int64_t *time_end, int flush)
{
#if HAVE_ARMV7
INT64 store_reg[8];
int64_t store_reg[8];
#endif
VP8_COMP *cpi = (VP8_COMP *) ptr;
VP8_COMMON *cm = &cpi->common;
@ -4972,7 +4977,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
// adjust frame rates based on timestamps given
if (!cm->refresh_alt_ref_frame)
{
long long this_duration;
int64_t this_duration;
int step = 0;
if (cpi->source->ts_start == cpi->first_time_stamp_ever)
@ -4982,7 +4987,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
}
else
{
long long last_duration;
int64_t last_duration;
this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen;
last_duration = cpi->last_end_time_stamp_seen
@ -5158,7 +5163,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
int y_samples = orig->y_height * orig->y_width ;
int uv_samples = orig->uv_height * orig->uv_width ;
int t_samples = y_samples + 2 * uv_samples;
long long sq_error;
int64_t sq_error;
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height,

View File

@ -321,10 +321,10 @@ typedef struct VP8_COMP
CODING_CONTEXT coding_context;
// Rate targetting variables
long long prediction_error;
long long last_prediction_error;
long long intra_error;
long long last_intra_error;
int64_t prediction_error;
int64_t last_prediction_error;
int64_t intra_error;
int64_t last_intra_error;
int this_frame_target;
int projected_frame_size;
@ -347,7 +347,7 @@ typedef struct VP8_COMP
int baseline_gf_interval;
int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames
INT64 key_frame_count;
int64_t key_frame_count;
int prior_key_frame_distance[KEY_FRAME_CONTEXT];
int per_frame_bandwidth; // Current section per frame bandwidth target
int av_per_frame_bandwidth; // Average frame size target for clip
@ -358,9 +358,9 @@ typedef struct VP8_COMP
int inter_frame_target;
double output_frame_rate;
long long last_time_stamp_seen;
long long last_end_time_stamp_seen;
long long first_time_stamp_ever;
int64_t last_time_stamp_seen;
int64_t last_end_time_stamp_seen;
int64_t first_time_stamp_ever;
int ni_av_qi;
int ni_tot_qi;
@ -371,7 +371,7 @@ typedef struct VP8_COMP
int zbin_mode_boost;
int zbin_mode_boost_enabled;
INT64 total_byte_count;
int64_t total_byte_count;
int buffered_mode;
@ -384,7 +384,7 @@ typedef struct VP8_COMP
int long_rolling_target_bits;
int long_rolling_actual_bits;
long long total_actual_bits;
int64_t total_actual_bits;
int total_target_vs_actual; // debug stats
int worst_quality;
@ -541,8 +541,8 @@ typedef struct VP8_COMP
FIRSTPASS_STATS *this_frame_stats;
FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
int first_pass_done;
long long bits_left;
long long clip_bits_total;
int64_t bits_left;
int64_t clip_bits_total;
double avg_iiratio;
double modified_error_total;
double modified_error_used;
@ -562,10 +562,10 @@ typedef struct VP8_COMP
int gf_group_error_left; // Remaining error from uncoded frames in a gf group. Two pass use only
// Projected total bits available for a key frame group of frames
long long kf_group_bits;
int64_t kf_group_bits;
// Error score of frames still to be coded in kf group
long long kf_group_error_left;
int64_t kf_group_error_left;
int gf_group_bits; // Projected Bits available for a group of frames including 1 GF or ARF
int gf_bits; // Bits for the golden frame or ARF - 2 pass only

View File

@ -669,8 +669,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
mvp_full.as_mv.col = mvp.as_mv.col>>3;
mvp_full.as_mv.row = mvp.as_mv.row>>3;
/* adjust mvp to make sure it is within MV range */
vp8_clamp_mv(&mvp_full, col_min, col_max, row_min, row_max);
}else
{
mvp.as_int = best_ref_mv.as_int;

View File

@ -161,7 +161,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
if (cm->sharpness_level != cm->last_sharpness_level)
{
vp8_loop_filter_update_sharpness(&cm->lf_info, cm->sharpness_level);
cm->last_sharpness_level = cm->last_sharpness_level;
cm->last_sharpness_level = cm->sharpness_level;
}
// Start the search at the previous frame filter level unless it is now out of range.

View File

@ -897,9 +897,9 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
if ( critical_buffer_level >
(cpi->oxcf.optimal_buffer_level >> 2) )
{
INT64 qadjustment_range =
int64_t qadjustment_range =
cpi->worst_quality - cpi->ni_av_qi;
INT64 above_base =
int64_t above_base =
(critical_buffer_level -
(cpi->oxcf.optimal_buffer_level >> 2));
@ -1163,7 +1163,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
if (cpi->pass==0
&& cpi->common.refresh_golden_frame
&& cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
long long adjust;
int64_t adjust;
/*
frames_in_buffer = cpi->oxcf.maximum_buffer_size

View File

@ -728,7 +728,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int distortion = 0;
int tot_rate_y = 0;
long long total_rd = 0;
int64_t total_rd = 0;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
@ -770,11 +770,11 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
mic->bmi[i].as_mode = best_mode;
if(total_rd >= (long long)best_rd)
if(total_rd >= (int64_t)best_rd)
break;
}
if(total_rd >= (long long)best_rd)
if(total_rd >= (int64_t)best_rd)
return INT_MAX;
*Rate = cost;
@ -1244,6 +1244,9 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
// Should we do a full search (best quality only)
if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
{
/* Check if mvp_full is within the range. */
vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
thissme = cpi->full_search_sad(x, c, e, &mvp_full,
sadpb, 16, v_fn_ptr,
x->mvcost, bsi->ref_mv);
@ -2079,9 +2082,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
mvp_full.as_mv.col = mvp.as_mv.col>>3;
mvp_full.as_mv.row = mvp.as_mv.row>>3;
/* adjust mvp to make sure it is within MV range */
vp8_clamp_mv(&mvp_full, col_min, col_max, row_min, row_max);
// Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
if (x->mv_col_min < col_min )
x->mv_col_min = col_min;

View File

@ -73,8 +73,8 @@ void ssim_parms_8x8_c
}
}
const static long long cc1 = 26634; // (64^2*(.01*255)^2
const static long long cc2 = 239708; // (64^2*(.03*255)^2
const static int64_t cc1 = 26634; // (64^2*(.01*255)^2
const static int64_t cc2 = 239708; // (64^2*(.03*255)^2
static double similarity
(
@ -86,19 +86,19 @@ static double similarity
int count
)
{
long long ssim_n, ssim_d;
long long c1, c2;
int64_t ssim_n, ssim_d;
int64_t c1, c2;
//scale the constants by number of pixels
c1 = (cc1*count*count)>>12;
c2 = (cc2*count*count)>>12;
ssim_n = (2*sum_s*sum_r+ c1)*((long long) 2*count*sum_sxr-
(long long) 2*sum_s*sum_r+c2);
ssim_n = (2*sum_s*sum_r+ c1)*((int64_t) 2*count*sum_sxr-
(int64_t) 2*sum_s*sum_r+c2);
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
((long long)count*sum_sq_s-(long long)sum_s*sum_s +
(long long)count*sum_sq_r-(long long) sum_r*sum_r +c2) ;
((int64_t)count*sum_sq_s-(int64_t)sum_s*sum_s +
(int64_t)count*sum_sq_r-(int64_t) sum_r*sum_r +c2) ;
return ssim_n * 1.0 / ssim_d;
}
@ -124,11 +124,11 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
const vp8_variance_rtcd_vtable_t *rtcd)
{
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
long long ssim3;
long long ssim_n1,ssim_n2;
long long ssim_d1,ssim_d2;
long long ssim_t1,ssim_t2;
long long c1, c2;
int64_t ssim3;
int64_t ssim_n1,ssim_n2;
int64_t ssim_d1,ssim_d2;
int64_t ssim_t1,ssim_t2;
int64_t c1, c2;
// normalize by 256/64
c1 = cc1*16;
@ -137,12 +137,12 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
ssim_n1 = (2*sum_s*sum_r+ c1);
ssim_n2 =((long long) 2*256*sum_sxr-(long long) 2*sum_s*sum_r+c2);
ssim_n2 =((int64_t) 2*256*sum_sxr-(int64_t) 2*sum_s*sum_r+c2);
ssim_d1 =((long long)sum_s*sum_s +(long long)sum_r*sum_r+c1);
ssim_d1 =((int64_t)sum_s*sum_s +(int64_t)sum_r*sum_r+c1);
ssim_d2 = (256 * (long long) sum_sq_s-(long long) sum_s*sum_s +
(long long) 256*sum_sq_r-(long long) sum_r*sum_r +c2) ;
ssim_d2 = (256 * (int64_t) sum_sq_s-(int64_t) sum_s*sum_s +
(int64_t) 256*sum_sq_r-(int64_t) sum_r*sum_r +c2) ;
ssim_t1 = 256 - 256 * ssim_n1 / ssim_d1;
ssim_t2 = 256 - 256 * ssim_n2 / ssim_d2;

View File

@ -24,7 +24,6 @@
#include "segmentation.h"
#include "vp8/common/g_common.h"
#include "vpx_scale/yv12extend.h"
#include "vp8/common/postproc.h"
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/swapyv12buffer.h"
#include "vp8/common/threading.h"

View File

@ -15,6 +15,7 @@ VP8_COMMON_SRCS-yes += common/ppflags.h
VP8_COMMON_SRCS-yes += common/onyx.h
VP8_COMMON_SRCS-yes += common/onyxd.h
VP8_COMMON_SRCS-yes += common/alloccommon.c
VP8_COMMON_SRCS-yes += common/asm_com_offsets.c
VP8_COMMON_SRCS-yes += common/blockd.c
VP8_COMMON_SRCS-yes += common/coefupdateprobs.h
VP8_COMMON_SRCS-yes += common/debugmodes.c
@ -101,19 +102,21 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
endif
# common (c)
VP8_COMMON_SRCS-$(ARCH_ARM) += common/asm_com_offsets.c
ifeq ($(CONFIG_CSM),yes)
VP8_COMMON_SRCS-yes += common/maskingmv.c
VP8_COMMON_SRCS-$(HAVE_SSE3) += common/x86/mask_sse3.asm
endif
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/bilinearfilter_arm.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/bilinearfilter_arm.h
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/filter_arm.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/idct_arm.h
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.h
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/recon_arm.h
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/subpixel_arm.h
# common (armv6)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM)

View File

@ -40,6 +40,7 @@ struct vp8_extracfg
unsigned int experimental;
vp8e_tuning tuning;
unsigned int cq_level; /* constrained quality level */
unsigned int rc_max_intra_bitrate_pct;
};
@ -73,6 +74,7 @@ static const struct extraconfig_map extracfg_map[] =
0, /* experimental mode */
0, /* tuning*/
10, /* cq_level */
0, /* rc_max_intra_bitrate_pct */
}
}
};
@ -308,7 +310,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
}
oxcf->target_bandwidth = cfg.rc_target_bitrate;
oxcf->rc_max_intra_bitrate_pct = cfg.rc_max_intra_bitrate_pct;
oxcf->rc_max_intra_bitrate_pct = vp8_cfg.rc_max_intra_bitrate_pct;
oxcf->best_allowed_q = cfg.rc_min_quantizer;
oxcf->worst_allowed_q = cfg.rc_max_quantizer;
@ -465,6 +467,7 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx,
MAP(VP8E_SET_ARNR_TYPE , xcfg.arnr_type);
MAP(VP8E_SET_TUNING, xcfg.tuning);
MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level);
MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, xcfg.rc_max_intra_bitrate_pct);
}
@ -745,7 +748,7 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
{
unsigned int lib_flags;
YV12_BUFFER_CONFIG sd;
INT64 dst_time_stamp, dst_end_time_stamp;
int64_t dst_time_stamp, dst_end_time_stamp;
unsigned long size, cx_data_sz;
unsigned char *cx_data;
@ -1107,6 +1110,7 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] =
{VP8E_SET_ARNR_TYPE , set_param},
{VP8E_SET_TUNING, set_param},
{VP8E_SET_CQ_LEVEL, set_param},
{VP8E_SET_MAX_INTRA_BITRATE_PCT, set_param},
{ -1, NULL},
};
@ -1139,7 +1143,6 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
{0}, /* rc_twopass_stats_in */
#endif
256, /* rc_target_bandwidth */
0, /* rc_max_intra_bitrate_pct */
4, /* rc_min_quantizer */
63, /* rc_max_quantizer */
100, /* rc_undershoot_pct */

View File

@ -427,7 +427,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
if (!res && ctx->pbi)
{
YV12_BUFFER_CONFIG sd;
INT64 time_stamp = 0, time_end_stamp = 0;
int64_t time_stamp = 0, time_end_stamp = 0;
vp8_ppflags_t flags = {0};
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)

View File

@ -15,9 +15,12 @@
# encoder
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/quantize_arm.c
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/picklpf_arm.c
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/dct_arm.c
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/dct_arm.h
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/encodemb_arm.h
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/picklpf_arm.c
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/quantize_arm.c
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/quantize_arm.h
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/variance_arm.c
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/variance_arm.h

View File

@ -48,6 +48,7 @@ VP8_DX_SRCS-yes += vp8_dx_iface.c
#INCLUDES += common
#INCLUDES += decoder
VP8_DX_SRCS-yes += decoder/asm_dec_offsets.c
VP8_DX_SRCS-yes += decoder/dboolhuff.c
VP8_DX_SRCS-yes += decoder/decodemv.c
VP8_DX_SRCS-yes += decoder/decodframe.c

View File

@ -12,9 +12,8 @@
#VP8_DX_SRCS list is modified according to different platforms.
VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c
VP8_CX_SRCS-$(ARCH_ARM) += decoder/asm_dec_offsets.c
VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/dequantize_arm.c
VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/dequantize_arm.h
#File list for armv6
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_dc_idct_v6$(ASM)

View File

@ -174,6 +174,20 @@ enum vp8e_enc_control_id
* \note Valid range: 0..63
*/
VP8E_SET_CQ_LEVEL,
/*!\brief Max data rate for Intra frames
*
* This value controls additional clamping on the maximum size of a
* keyframe. It is expressed as a percentage of the average
* per-frame bitrate, with the special (and default) value 0 meaning
* unlimited, or no additional clamping beyond the codec's built-in
* algorithm.
*
* For example, to allocate no more than 4.5 frames worth of bitrate
* to a keyframe, set this to 450.
*
*/
VP8E_SET_MAX_INTRA_BITRATE_PCT,
};
/*!\brief vpx 1-D scaling mode
@ -305,6 +319,9 @@ VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL , unsigned int)
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *)
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *)
VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
/*! @} - end defgroup vp8_encoder */
#include "vpx_codec_impl_bottom.h"
#endif

View File

@ -433,21 +433,6 @@ extern "C" {
unsigned int rc_target_bitrate;
/*!\brief Max data rate for Intra frames
*
* This value controls additional clamping on the maximum size of a
* keyframe. It is expressed as a percentage of the average
* per-frame bitrate, with the special (and default) value 0 meaning
* unlimited, or no additional clamping beyond the codec's built-in
* algorithm.
*
* For example, to allocate no more than 4.5 frames worth of bitrate
* to a keyframe, set this to 450.
*
*/
unsigned int rc_max_intra_bitrate_pct;
/*
* quantizer settings
*/

View File

@ -18,42 +18,32 @@
AREA ||.text||, CODE, READONLY, ALIGN=2
;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf);
;Note: this is VP8 function, which has border=32 and 16. Internal y_width and y_height
; are always multiples of 16.
; we depend on VP8BORDERINPIXELS being 32
|vp8_yv12_extend_frame_borders_neon| PROC
push {r4 - r10, lr}
vpush {d8 - d15}
;Not need to load y_width, since: y_width = y_stride - 2*border
ldr r3, [r0, #yv12_buffer_config_border]
ldr r1, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
ldr r4, [r0, #yv12_buffer_config_y_height]
ldr lr, [r0, #yv12_buffer_config_y_stride]
; Border = 32
ldr r3, [r0, #yv12_buffer_config_y_width] ; plane_width
ldr r1, [r0, #yv12_buffer_config_y_buffer] ; src_ptr1
ldr r4, [r0, #yv12_buffer_config_y_height] ; plane_height
ldr lr, [r0, #yv12_buffer_config_y_stride] ; plane_stride
cmp r3, #16
beq b16_extend_frame_borders
; Border copy for Y plane
; copy the left and right most columns out
add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width)
sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width - 1
sub r5, r1, #32 ; dest_ptr1 = src_ptr1 - Border
;=======================
b32_extend_frame_borders
;border = 32
;=======================
;Border copy for Y plane
;copy the left and right most columns out
sub r5, r1, r3 ;destptr1
add r6, r1, lr
sub r6, r6, r3, lsl #1 ;destptr2
sub r2, r6, #1 ;srcptr2
;Do four rows at one time
mov r12, r4, lsr #2
mov r12, r4, lsr #2 ; plane_height / 4
copy_left_right_y
vld1.8 {d0[], d1[]}, [r1], lr
vld1.8 {d4[], d5[]}, [r2], lr
vld1.8 {d8[], d9[]}, [r1], lr
vld1.8 {d12[], d13[]}, [r2], lr
vld1.8 {d16[], d17[]}, [r1], lr
vld1.8 {d16[], d17[]}, [r1], lr
vld1.8 {d20[], d21[]}, [r2], lr
vld1.8 {d24[], d25[]}, [r1], lr
vld1.8 {d28[], d29[]}, [r2], lr
@ -81,15 +71,16 @@ copy_left_right_y
bne copy_left_right_y
;Now copy the top and bottom source lines into each line of the respective borders
ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
mul r8, r3, lr
ldr r1, [r0, #yv12_buffer_config_y_buffer] ; y_buffer
mul r8, r4, lr ; plane_height * plane_stride
mov r12, lr, lsr #7
; copy width is plane_stride
mov r12, lr, lsr #7 ; plane_stride / 128
sub r6, r1, r3 ;destptr2
sub r2, r6, lr ;srcptr2
sub r1, r7, r3 ;srcptr1
sub r5, r1, r8 ;destptr1
sub r1, r1, #32 ; src_ptr1 = y_buffer - Border
add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_stride (src_ptr1 + (plane_height * plane_stride))
sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
copy_top_bottom_y
vld1.8 {q0, q1}, [r1]!
@ -101,7 +92,7 @@ copy_top_bottom_y
vld1.8 {q6, q7}, [r1]!
vld1.8 {q14, q15}, [r2]!
mov r7, r3
mov r7, #32 ; Border
top_bottom_32
subs r7, r7, #1
@ -115,44 +106,41 @@ top_bottom_32
vst1.8 {q6, q7}, [r5]!
vst1.8 {q14, q15}, [r6]!
add r5, r5, lr
sub r5, r5, #128
add r6, r6, lr
sub r6, r6, #128
add r5, r5, lr ; dest_ptr1 += plane_stride
sub r5, r5, #128 ; dest_ptr1 -= 128
add r6, r6, lr ; dest_ptr2 += plane_stride
sub r6, r6, #128 ; dest_ptr2 -= 128
bne top_bottom_32
sub r5, r1, r8
add r6, r2, lr
sub r5, r1, lr, asl #5 ; src_ptr1 - (Border* plane_stride)
add r6, r2, lr ; src_ptr2 + plane_stride
subs r12, r12, #1
bne copy_top_bottom_y
mov r7, lr, lsr #4 ;check to see if extra copy is needed
mov r7, lr, lsr #4 ; check to see if extra copy is needed
ands r7, r7, #0x7
bne extra_top_bottom_y
end_of_border_copy_y
;Border copy for U, V planes
ldr r1, [r0, #yv12_buffer_config_u_buffer] ;srcptr1
mov lr, lr, lsr #1 ;uv_stride
mov r3, r3, lsr #1 ;border
mov r4, r4, lsr #1 ;uv_height
mov r8, r8, lsr #2
; Border = 16
ldr r7, [r0, #yv12_buffer_config_u_buffer] ; src_ptr1
ldr lr, [r0, #yv12_buffer_config_uv_stride] ; plane_stride
ldr r3, [r0, #yv12_buffer_config_uv_width] ; plane_width
ldr r4, [r0, #yv12_buffer_config_uv_height] ; plane_height
mov r10, #2
;copy the left and right most columns out
border_copy_uv
sub r5, r1, r3 ;destptr1
add r6, r1, lr
sub r6, r6, r3, lsl #1 ;destptr2
sub r2, r6, #1 ;srcptr2
mov r1, r7 ; src_ptr1 needs to be saved for second half of loop
sub r5, r1, #16 ; dest_ptr1 = src_ptr1 - Border
add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width)
sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width - 1
mov r7, r1
;Do eight rows at one time
mov r12, r4, lsr #3
mov r12, r4, lsr #3 ; plane_height / 8
copy_left_right_uv
vld1.8 {d0[], d1[]}, [r1], lr
@ -167,7 +155,7 @@ copy_left_right_uv
vld1.8 {d18[], d19[]}, [r2], lr
vld1.8 {d20[], d21[]}, [r1], lr
vld1.8 {d22[], d23[]}, [r2], lr
vld1.8 {d24[], d25[]}, [r1], lr
vld1.8 {d24[], d25[]}, [r1], lr
vld1.8 {d26[], d27[]}, [r2], lr
vld1.8 {d28[], d29[]}, [r1], lr
vld1.8 {d30[], d31[]}, [r2], lr
@ -194,12 +182,14 @@ copy_left_right_uv
bne copy_left_right_uv
;Now copy the top and bottom source lines into each line of the respective borders
mov r12, lr, lsr #6
mov r1, r7
mul r8, r4, lr ; plane_height * plane_stride
mov r12, lr, lsr #6 ; plane_stride / 64
sub r6, r1, r3 ;destptr2
sub r2, r6, lr ;srcptr2
sub r1, r7, r3 ;srcptr1
sub r5, r1, r8 ;destptr1
sub r1, r1, #16 ; src_ptr1 = u_buffer - Border
add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_stride (src_ptr1 + (plane_height * plane_stride)
sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
copy_top_bottom_uv
vld1.8 {q0, q1}, [r1]!
@ -207,7 +197,7 @@ copy_top_bottom_uv
vld1.8 {q2, q3}, [r1]!
vld1.8 {q10, q11}, [r2]!
mov r7, r3
mov r7, #16 ; Border
top_bottom_16
subs r7, r7, #1
@ -217,38 +207,37 @@ top_bottom_16
vst1.8 {q2, q3}, [r5]!
vst1.8 {q10, q11}, [r6]!
add r5, r5, lr
add r5, r5, lr ; dest_ptr1 += plane_stride
sub r5, r5, #64
add r6, r6, lr
add r6, r6, lr ; dest_ptr2 += plane_stride
sub r6, r6, #64
bne top_bottom_16
sub r5, r1, r8
add r6, r2, lr
sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
add r6, r2, lr ; dest_ptr2 = src_ptr2 + plane_stride
subs r12, r12, #1
bne copy_top_bottom_uv
mov r7, lr, lsr #3 ;check to see if extra copy is needed
mov r7, lr, lsr #3 ; check to see if extra copy is needed
ands r7, r7, #0x7
bne extra_top_bottom_uv
end_of_border_copy_uv
subs r10, r10, #1
ldrne r1, [r0, #yv12_buffer_config_v_buffer] ;srcptr1
ldrne r7, [r0, #yv12_buffer_config_v_buffer] ; src_ptr1
bne border_copy_uv
vpop {d8 - d15}
pop {r4 - r10, pc}
;;;;;;;;;;;;;;;;;;;;;;
;extra copy part for Y
extra_top_bottom_y
vld1.8 {q0}, [r1]!
vld1.8 {q2}, [r2]!
mov r9, r3, lsr #3
mov r9, #4 ; 32 >> 3
extra_top_bottom_32
subs r9, r9, #1
@ -271,19 +260,18 @@ extra_top_bottom_32
vst1.8 {q2}, [r6], lr
bne extra_top_bottom_32
sub r5, r1, r8
add r6, r2, lr
sub r5, r1, lr, asl #5 ; src_ptr1 - (Border * plane_stride)
add r6, r2, lr ; src_ptr2 + plane_stride
subs r7, r7, #1
bne extra_top_bottom_y
b end_of_border_copy_y
;extra copy part for UV
extra_top_bottom_uv
vld1.8 {d0}, [r1]!
vld1.8 {d8}, [r2]!
mov r9, r3, lsr #3
mov r9, #2 ; 16 >> 3
extra_top_bottom_16
subs r9, r9, #1
@ -306,283 +294,12 @@ extra_top_bottom_16
vst1.8 {d8}, [r6], lr
bne extra_top_bottom_16
sub r5, r1, r8
add r6, r2, lr
sub r5, r1, lr, asl #4 ; src_ptr1 - (Border * plane_stride)
add r6, r2, lr ; src_ptr2 + plane_stride
subs r7, r7, #1
bne extra_top_bottom_uv
b end_of_border_copy_uv
;=======================
b16_extend_frame_borders
;border = 16
;=======================
;Border copy for Y plane
;copy the left and right most columns out
sub r5, r1, r3 ;destptr1
add r6, r1, lr
sub r6, r6, r3, lsl #1 ;destptr2
sub r2, r6, #1 ;srcptr2
;Do four rows at one time
mov r12, r4, lsr #2
copy_left_right_y_b16
vld1.8 {d0[], d1[]}, [r1], lr
vld1.8 {d4[], d5[]}, [r2], lr
vld1.8 {d8[], d9[]}, [r1], lr
vld1.8 {d12[], d13[]}, [r2], lr
vld1.8 {d16[], d17[]}, [r1], lr
vld1.8 {d20[], d21[]}, [r2], lr
vld1.8 {d24[], d25[]}, [r1], lr
vld1.8 {d28[], d29[]}, [r2], lr
subs r12, r12, #1
vst1.8 {q0}, [r5], lr
vst1.8 {q2}, [r6], lr
vst1.8 {q4}, [r5], lr
vst1.8 {q6}, [r6], lr
vst1.8 {q8}, [r5], lr
vst1.8 {q10}, [r6], lr
vst1.8 {q12}, [r5], lr
vst1.8 {q14}, [r6], lr
bne copy_left_right_y_b16
;Now copy the top and bottom source lines into each line of the respective borders
ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1
mul r8, r3, lr
mov r12, lr, lsr #7
sub r6, r1, r3 ;destptr2
sub r2, r6, lr ;srcptr2
sub r1, r7, r3 ;srcptr1
sub r5, r1, r8 ;destptr1
copy_top_bottom_y_b16
vld1.8 {q0, q1}, [r1]!
vld1.8 {q8, q9}, [r2]!
vld1.8 {q2, q3}, [r1]!
vld1.8 {q10, q11}, [r2]!
vld1.8 {q4, q5}, [r1]!
vld1.8 {q12, q13}, [r2]!
vld1.8 {q6, q7}, [r1]!
vld1.8 {q14, q15}, [r2]!
mov r7, r3
top_bottom_16_b16
subs r7, r7, #1
vst1.8 {q0, q1}, [r5]!
vst1.8 {q8, q9}, [r6]!
vst1.8 {q2, q3}, [r5]!
vst1.8 {q10, q11}, [r6]!
vst1.8 {q4, q5}, [r5]!
vst1.8 {q12, q13}, [r6]!
vst1.8 {q6, q7}, [r5]!
vst1.8 {q14, q15}, [r6]!
add r5, r5, lr
sub r5, r5, #128
add r6, r6, lr
sub r6, r6, #128
bne top_bottom_16_b16
sub r5, r1, r8
add r6, r2, lr
subs r12, r12, #1
bne copy_top_bottom_y_b16
mov r7, lr, lsr #4 ;check to see if extra copy is needed
ands r7, r7, #0x7
bne extra_top_bottom_y_b16
end_of_border_copy_y_b16
;Border copy for U, V planes
ldr r1, [r0, #yv12_buffer_config_u_buffer] ;srcptr1
mov lr, lr, lsr #1 ;uv_stride
mov r3, r3, lsr #1 ;border
mov r4, r4, lsr #1 ;uv_height
mov r8, r8, lsr #2
mov r10, #2
;copy the left and right most columns out
border_copy_uv_b16
sub r5, r1, r3 ;destptr1
add r6, r1, lr
sub r6, r6, r3, lsl #1 ;destptr2
sub r2, r6, #1 ;srcptr2
mov r7, r1
;Do eight rows at one time
mov r12, r4, lsr #3
copy_left_right_uv_b16
vld1.8 {d0[]}, [r1], lr
vld1.8 {d2[]}, [r2], lr
vld1.8 {d4[]}, [r1], lr
vld1.8 {d6[]}, [r2], lr
vld1.8 {d8[]}, [r1], lr
vld1.8 {d10[]}, [r2], lr
vld1.8 {d12[]}, [r1], lr
vld1.8 {d14[]}, [r2], lr
vld1.8 {d16[]}, [r1], lr
vld1.8 {d18[]}, [r2], lr
vld1.8 {d20[]}, [r1], lr
vld1.8 {d22[]}, [r2], lr
vld1.8 {d24[]}, [r1], lr
vld1.8 {d26[]}, [r2], lr
vld1.8 {d28[]}, [r1], lr
vld1.8 {d30[]}, [r2], lr
subs r12, r12, #1
vst1.8 {d0}, [r5], lr
vst1.8 {d2}, [r6], lr
vst1.8 {d4}, [r5], lr
vst1.8 {d6}, [r6], lr
vst1.8 {d8}, [r5], lr
vst1.8 {d10}, [r6], lr
vst1.8 {d12}, [r5], lr
vst1.8 {d14}, [r6], lr
vst1.8 {d16}, [r5], lr
vst1.8 {d18}, [r6], lr
vst1.8 {d20}, [r5], lr
vst1.8 {d22}, [r6], lr
vst1.8 {d24}, [r5], lr
vst1.8 {d26}, [r6], lr
vst1.8 {d28}, [r5], lr
vst1.8 {d30}, [r6], lr
bne copy_left_right_uv_b16
;Now copy the top and bottom source lines into each line of the respective borders
mov r12, lr, lsr #6
sub r6, r1, r3 ;destptr2
sub r2, r6, lr ;srcptr2
sub r1, r7, r3 ;srcptr1
sub r5, r1, r8 ;destptr1
copy_top_bottom_uv_b16
vld1.8 {q0, q1}, [r1]!
vld1.8 {q8, q9}, [r2]!
vld1.8 {q2, q3}, [r1]!
vld1.8 {q10, q11}, [r2]!
mov r7, r3
top_bottom_8_b16
subs r7, r7, #1
vst1.8 {q0, q1}, [r5]!
vst1.8 {q8, q9}, [r6]!
vst1.8 {q2, q3}, [r5]!
vst1.8 {q10, q11}, [r6]!
add r5, r5, lr
sub r5, r5, #64
add r6, r6, lr
sub r6, r6, #64
bne top_bottom_8_b16
sub r5, r1, r8
add r6, r2, lr
subs r12, r12, #1
bne copy_top_bottom_uv_b16
mov r7, lr, lsr #3 ;check to see if extra copy is needed
ands r7, r7, #0x7
bne extra_top_bottom_uv_b16
end_of_border_copy_uv_b16
subs r10, r10, #1
ldrne r1, [r0, #yv12_buffer_config_v_buffer] ;srcptr1
bne border_copy_uv_b16
vpop {d8-d15}
pop {r4 - r10, pc}
;;;;;;;;;;;;;;;;;;;;;;
;extra copy part for Y
extra_top_bottom_y_b16
vld1.8 {q0}, [r1]!
vld1.8 {q2}, [r2]!
mov r9, r3, lsr #3
extra_top_bottom_16_b16
subs r9, r9, #1
vst1.8 {q0}, [r5], lr
vst1.8 {q2}, [r6], lr
vst1.8 {q0}, [r5], lr
vst1.8 {q2}, [r6], lr
vst1.8 {q0}, [r5], lr
vst1.8 {q2}, [r6], lr
vst1.8 {q0}, [r5], lr
vst1.8 {q2}, [r6], lr
vst1.8 {q0}, [r5], lr
vst1.8 {q2}, [r6], lr
vst1.8 {q0}, [r5], lr
vst1.8 {q2}, [r6], lr
vst1.8 {q0}, [r5], lr
vst1.8 {q2}, [r6], lr
vst1.8 {q0}, [r5], lr
vst1.8 {q2}, [r6], lr
bne extra_top_bottom_16_b16
sub r5, r1, r8
add r6, r2, lr
subs r7, r7, #1
bne extra_top_bottom_y_b16
b end_of_border_copy_y_b16
;extra copy part for UV
extra_top_bottom_uv_b16
vld1.8 {d0}, [r1]!
vld1.8 {d8}, [r2]!
mov r9, r3, lsr #3
extra_top_bottom_8_b16
subs r9, r9, #1
vst1.8 {d0}, [r5], lr
vst1.8 {d8}, [r6], lr
vst1.8 {d0}, [r5], lr
vst1.8 {d8}, [r6], lr
vst1.8 {d0}, [r5], lr
vst1.8 {d8}, [r6], lr
vst1.8 {d0}, [r5], lr
vst1.8 {d8}, [r6], lr
vst1.8 {d0}, [r5], lr
vst1.8 {d8}, [r6], lr
vst1.8 {d0}, [r5], lr
vst1.8 {d8}, [r6], lr
vst1.8 {d0}, [r5], lr
vst1.8 {d8}, [r6], lr
vst1.8 {d0}, [r5], lr
vst1.8 {d8}, [r6], lr
bne extra_top_bottom_8_b16
sub r5, r1, r8
add r6, r2, lr
subs r7, r7, #1
bne extra_top_bottom_uv_b16
b end_of_border_copy_uv_b16
ENDP
END

View File

@ -542,7 +542,7 @@ void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, un
}
#undef WRITE_BUFFER
/* Need a fixed size serializer for the track ID. libmkv provdes a 64 bit
/* Need a fixed size serializer for the track ID. libmkv provides a 64 bit
* one, but not a 32 bit one.
*/
static void Ebml_SerializeUnsigned32(EbmlGlobal *glob, unsigned long class_id, uint64_t ui)
@ -559,8 +559,8 @@ Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc,
unsigned long class_id)
{
//todo this is always taking 8 bytes, this may need later optimization
//this is a key that says lenght unknown
unsigned long long unknownLen = LITERALU64(0x01FFFFFFFFFFFFFF);
//this is a key that says length unknown
uint64_t unknownLen = LITERALU64(0x01FFFFFFFFFFFFFF);
Ebml_WriteID(glob, class_id);
*ebmlLoc = ftello(glob->stream);
@ -975,7 +975,7 @@ static const struct arg_enum_list stereo_mode_enum[] = {
static const arg_def_t stereo_mode = ARG_DEF_ENUM(NULL, "stereo-mode", 1,
"Stereo 3D video format", stereo_mode_enum);
static const arg_def_t timebase = ARG_DEF(NULL, "timebase", 1,
"Stream timebase (frame duration)");
"Output timestamp precision (fractional seconds)");
static const arg_def_t error_resilient = ARG_DEF(NULL, "error-resilient", 1,
"Enable error resiliency features");
static const arg_def_t lag_in_frames = ARG_DEF(NULL, "lag-in-frames", 1,
@ -1020,14 +1020,11 @@ static const arg_def_t buf_initial_sz = ARG_DEF(NULL, "buf-initial-sz", 1,
"Client initial buffer size (ms)");
static const arg_def_t buf_optimal_sz = ARG_DEF(NULL, "buf-optimal-sz", 1,
"Client optimal buffer size (ms)");
static const arg_def_t max_intra_rate_pct = ARG_DEF(NULL, "max-intra-rate", 1,
"Max I-frame bitrate (pct)");
static const arg_def_t *rc_args[] =
{
&dropframe_thresh, &resize_allowed, &resize_up_thresh, &resize_down_thresh,
&end_usage, &target_bitrate, &min_quantizer, &max_quantizer,
&undershoot_pct, &overshoot_pct, &buf_sz, &buf_initial_sz, &buf_optimal_sz,
&max_intra_rate_pct,
NULL
};
@ -1091,12 +1088,14 @@ static const arg_def_t tune_ssim = ARG_DEF_ENUM(NULL, "tune", 1,
"Material to favor", tuning_enum);
static const arg_def_t cq_level = ARG_DEF(NULL, "cq-level", 1,
"Constrained Quality Level");
static const arg_def_t max_intra_rate_pct = ARG_DEF(NULL, "max-intra-rate", 1,
"Max I-frame bitrate (pct)");
static const arg_def_t *vp8_args[] =
{
&cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
&token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
&tune_ssim, &cq_level, NULL
&tune_ssim, &cq_level, &max_intra_rate_pct, NULL
};
static const int vp8_arg_ctrl_map[] =
{
@ -1104,7 +1103,7 @@ static const int vp8_arg_ctrl_map[] =
VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
VP8E_SET_TOKEN_PARTITIONS,
VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE,
VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, 0
VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, 0
};
#endif
@ -1131,6 +1130,9 @@ static void usage_exit()
fprintf(stderr, "\nVP8 Specific Options:\n");
arg_show_usage(stdout, vp8_args);
#endif
fprintf(stderr, "\nStream timebase (--timebase):\n"
" The desired precision of timestamps in the output, expressed\n"
" in fractional seconds. Default is 1/1000.\n");
fprintf(stderr, "\n"
"Included encoders:\n"
"\n");
@ -1169,7 +1171,7 @@ static int merge_hist_buckets(struct hist_bucket *bucket,
big_bucket = i;
}
/* If we have too many buckets, merge the smallest with an ajacent
/* If we have too many buckets, merge the smallest with an adjacent
* bucket.
*/
while(buckets > max_buckets)
@ -1640,8 +1642,6 @@ int main(int argc, const char **argv_)
cfg.rc_end_usage = arg_parse_enum_or_int(&arg);
else if (arg_match(&arg, &target_bitrate, argi))
cfg.rc_target_bitrate = arg_parse_uint(&arg);
else if (arg_match(&arg, &max_intra_rate_pct, argi))
cfg.rc_max_intra_bitrate_pct = arg_parse_uint(&arg);
else if (arg_match(&arg, &min_quantizer, argi))
cfg.rc_min_quantizer = arg_parse_uint(&arg);
else if (arg_match(&arg, &max_quantizer, argi))