From 6eec73a747f3d14d327cdc33279de96d0deb48c8 Mon Sep 17 00:00:00 2001 From: Johann Date: Thu, 31 Jul 2014 14:19:31 -0700 Subject: [PATCH] Remove asm offset dependencies The obj_int_extract code is no longer worth maintaining. It creates significant issues when adapting for different build systems and no longer offers as significant of a performance benefit due to improvements in intrinsics. Source files will remain until the various third-party builds are updated. The neon fast quantizer has been moved to intrinsics. The armv6 version has been removed because so few remaining targets require it. Compilers and processors have improved significantly since the pack_tokens code was written. The assembly is no longer faster than the C code. pack_tokens were the only optimizations for the armv5te targets so the targets will be removed after the test infrastructure has been updated. BUG=710 Change-Id: Ic785b167cd9f95eeff31c7c76b7b736c07fb30eb --- build/arm-msvs/obj_int_extract.bat | 18 - build/make/Android.mk | 62 +-- build/make/Makefile | 12 - build/make/configure.sh | 6 - build/make/gen_msvs_proj.sh | 42 -- build/make/gen_msvs_vcxproj.sh | 33 +- build/make/rtcd.pl | 7 +- build/x86-msvs/obj_int_extract.bat | 15 - configure | 2 - libs.mk | 50 +- solution.mk | 3 +- test/quantize_test.cc | 6 - vp8/common/rtcd_defs.pl | 12 +- vp8/encoder/arm/armv5te/boolhuff_armv5te.asm | 310 ------------ .../arm/armv5te/vp8_packtokens_armv5.asm | 317 ------------ .../armv5te/vp8_packtokens_mbrow_armv5.asm | 352 ------------- .../vp8_packtokens_partitions_armv5.asm | 471 ------------------ .../arm/armv6/vp8_fast_quantize_b_armv6.asm | 225 --------- vp8/encoder/arm/armv6/vp8_subtract_armv6.asm | 272 ---------- vp8/encoder/arm/boolhuff_arm.c | 41 -- vp8/encoder/bitstream.c | 12 +- vp8/encoder/bitstream.h | 31 +- vp8/encoder/encodeframe.c | 2 +- vp8/encoder/ethreading.c | 2 +- vp8/encoder/vp8_asm_enc_offsets.c | 77 --- vp8/vp8cx.mk | 4 - vp8/vp8cx_arm.mk | 11 - vpx_ports/arm_cpudetect.c | 29 +- vpx_scale/vpx_scale.mk | 4 - vpx_scale/vpx_scale_asm_offsets.c | 24 - 30 files changed, 26 insertions(+), 2426 deletions(-) delete mode 100644 build/arm-msvs/obj_int_extract.bat delete mode 100644 build/x86-msvs/obj_int_extract.bat delete mode 100644 vp8/encoder/arm/armv5te/boolhuff_armv5te.asm delete mode 100644 vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm delete mode 100644 vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm delete mode 100644 vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm delete mode 100644 vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm delete mode 100644 vp8/encoder/arm/armv6/vp8_subtract_armv6.asm delete mode 100644 vp8/encoder/arm/boolhuff_arm.c diff --git a/build/arm-msvs/obj_int_extract.bat b/build/arm-msvs/obj_int_extract.bat deleted file mode 100644 index c0987bcf7..000000000 --- a/build/arm-msvs/obj_int_extract.bat +++ /dev/null @@ -1,18 +0,0 @@ -REM Copyright (c) 2013 The WebM project authors. All Rights Reserved. -REM -REM Use of this source code is governed by a BSD-style license -REM that can be found in the LICENSE file in the root of the source -REM tree. An additional intellectual property rights grant can be found -REM in the file PATENTS. All contributing project authors may -REM be found in the AUTHORS file in the root of the source tree. -echo on - -REM Arguments: -REM %1 - Relative path to the directory containing the vp8 and vpx_scale -REM source directories. -REM %2 - Path to obj_int_extract.exe. -cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vp8/encoder/vp8_asm_enc_offsets.c" -%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm" - -cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vpx_scale/vpx_scale_asm_offsets.c" -%2\obj_int_extract.exe rvds "vpx_scale_asm_offsets.obj" > "vpx_scale_asm_offsets.asm" diff --git a/build/make/Android.mk b/build/make/Android.mk index 816334e04..d897b44cc 100644 --- a/build/make/Android.mk +++ b/build/make/Android.mk @@ -43,7 +43,7 @@ # will remove any NEON dependency. # To change to building armeabi, run ./libvpx/configure again, but with -# --target=arm5te-android-gcc and modify the Application.mk file to +# --target=armv6-android-gcc and modify the Application.mk file to # set APP_ABI := armeabi # # Running ndk-build will build libvpx and include it in your project. @@ -60,7 +60,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) include $(CONFIG_DIR)libs-armv7-android-gcc.mk LOCAL_ARM_MODE := arm else ifeq ($(TARGET_ARCH_ABI),armeabi) - include $(CONFIG_DIR)libs-armv5te-android-gcc.mk + include $(CONFIG_DIR)libs-armv6-android-gcc.mk LOCAL_ARM_MODE := arm else ifeq ($(TARGET_ARCH_ABI),arm64-v8a) include $(CONFIG_DIR)libs-armv8-android-gcc.mk @@ -91,51 +91,8 @@ LOCAL_CFLAGS := -O3 # like x86inc.asm and x86_abi_support.asm LOCAL_ASMFLAGS := -I$(LIBVPX_PATH) -# ----------------------------------------------------------------------------- -# Template : asm_offsets_template -# Arguments : 1: assembly offsets file to be created -# 2: c file to base assembly offsets on -# Returns : None -# Usage : $(eval $(call asm_offsets_template,, -# Rationale : Create offsets at compile time using for structures that are -# defined in c, but used in assembly functions. -# ----------------------------------------------------------------------------- -define asm_offsets_template - -_SRC:=$(2) -_OBJ:=$(ASM_CNV_PATH)/$$(notdir $(2)).S - -_FLAGS = $$($$(my)CFLAGS) \ - $$(call get-src-file-target-cflags,$(2)) \ - $$(call host-c-includes,$$(LOCAL_C_INCLUDES) $$(CONFIG_DIR)) \ - $$(LOCAL_CFLAGS) \ - $$(NDK_APP_CFLAGS) \ - $$(call host-c-includes,$$($(my)C_INCLUDES)) \ - -DINLINE_ASM \ - -S \ - -_TEXT = "Compile $$(call get-src-file-text,$(2))" -_CC = $$(TARGET_CC) - -$$(eval $$(call ev-build-file)) - -$(1) : $$(_OBJ) $(2) - @mkdir -p $$(dir $$@) - @grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)$(ASM_CONVERSION) > $$@ -endef - -# Use ads2gas script to convert from RVCT format to GAS format. This -# puts the processed file under $(ASM_CNV_PATH). Local clean rule -# to handle removing these -ifeq ($(CONFIG_VP8_ENCODER), yes) - ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm -endif -ifeq ($(HAVE_NEON_ASM), yes) - ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm -endif - .PRECIOUS: %.asm.s -$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm $(ASM_CNV_OFFSETS_DEPEND) +$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm @mkdir -p $(dir $@) @$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@ @@ -224,24 +181,11 @@ endif clean: @echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]" @$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS) - @$(RM) $(patsubst %.asm, %.*, $(ASM_CNV_OFFSETS_DEPEND)) @$(RM) -r $(ASM_CNV_PATH) @$(RM) $(CLEAN-OBJS) include $(BUILD_SHARED_LIBRARY) -ifeq ($(HAVE_NEON), yes) - $(eval $(call asm_offsets_template,\ - $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm, \ - $(LIBVPX_PATH)/vpx_scale/vpx_scale_asm_offsets.c)) -endif - -ifeq ($(CONFIG_VP8_ENCODER), yes) - $(eval $(call asm_offsets_template,\ - $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm, \ - $(LIBVPX_PATH)/vp8/encoder/vp8_asm_enc_offsets.c)) -endif - ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes) $(call import-module,cpufeatures) endif diff --git a/build/make/Makefile b/build/make/Makefile index ed90397f0..da5721adb 100644 --- a/build/make/Makefile +++ b/build/make/Makefile @@ -216,14 +216,6 @@ else $(qexec)cp $< $@ endif -# -# Rule to extract assembly constants from C sources -# -obj_int_extract: build/make/obj_int_extract.c - $(if $(quiet),@echo " [HOSTCC] $@") - $(qexec)$(HOSTCC) -I. -I$(SRC_PATH_BARE) -o $@ $< -CLEAN-OBJS += obj_int_extract - # # Utility functions # @@ -424,11 +416,7 @@ ifneq ($(call enabled,DIST-SRCS),) DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh - DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/obj_int_extract.bat - DIST-SRCS-$(CONFIG_MSVS) += build/arm-msvs/obj_int_extract.bat DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh - # Include obj_int_extract if we use offsets from *_asm_*_offsets - DIST-SRCS-$(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64) += build/make/obj_int_extract.c DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas.pl DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas_apple.pl DIST-SRCS-$(ARCH_ARM) += build/make/ads2armasm_ms.pl diff --git a/build/make/configure.sh b/build/make/configure.sh index 56e9f4406..67636887e 100644 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -822,18 +822,12 @@ process_common_toolchain() { soft_enable neon soft_enable neon_asm soft_enable media - soft_enable edsp soft_enable fast_unaligned ;; armv6) soft_enable media - soft_enable edsp soft_enable fast_unaligned ;; - armv5te) - soft_enable edsp - disable_feature fast_unaligned - ;; esac asm_conversion_cmd="cat" diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh index 790722593..dcce78255 100755 --- a/build/make/gen_msvs_proj.sh +++ b/build/make/gen_msvs_proj.sh @@ -295,22 +295,7 @@ generate_vcproj() { case "$target" in x86*) case "$name" in - obj_int_extract) - tag Tool \ - Name="VCCLCompilerTool" \ - Optimization="0" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="WIN32;DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \ - RuntimeLibrary="$debug_runtime" \ - WarningLevel="3" \ - DebugInformationFormat="1" \ - $warn_64bit \ - ;; vpx) - tag Tool \ - Name="VCPreBuildEventTool" \ - CommandLine="call obj_int_extract.bat "$src_path_bare" $plat_no_ws\\\$(ConfigurationName)" \ - tag Tool \ Name="VCCLCompilerTool" \ Optimization="0" \ @@ -347,11 +332,6 @@ generate_vcproj() { case "$target" in x86*) case "$name" in - obj_int_extract) - tag Tool \ - Name="VCLinkerTool" \ - GenerateDebugInformation="true" \ - ;; *) tag Tool \ Name="VCLinkerTool" \ @@ -400,24 +380,7 @@ generate_vcproj() { case "$target" in x86*) case "$name" in - obj_int_extract) - tag Tool \ - Name="VCCLCompilerTool" \ - Optimization="2" \ - FavorSizeorSpeed="1" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \ - RuntimeLibrary="$release_runtime" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - DebugInformationFormat="0" \ - $warn_64bit \ - ;; vpx) - tag Tool \ - Name="VCPreBuildEventTool" \ - CommandLine="call obj_int_extract.bat "$src_path_bare" $plat_no_ws\\\$(ConfigurationName)" \ - tag Tool \ Name="VCCLCompilerTool" \ Optimization="2" \ @@ -456,11 +419,6 @@ generate_vcproj() { case "$target" in x86*) case "$name" in - obj_int_extract) - tag Tool \ - Name="VCLinkerTool" \ - GenerateDebugInformation="true" \ - ;; *) tag Tool \ Name="VCLinkerTool" \ diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh index 56b9a3b50..643ebd634 100755 --- a/build/make/gen_msvs_vcxproj.sh +++ b/build/make/gen_msvs_vcxproj.sh @@ -262,15 +262,9 @@ case "$target" in asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} "%(FullPath)"" ;; arm*) + platforms[0]="ARM" asm_Debug_cmdline="armasm -nologo "%(FullPath)"" asm_Release_cmdline="armasm -nologo "%(FullPath)"" - if [ "$name" = "obj_int_extract" ]; then - # We don't want to build this tool for the target architecture, - # but for an architecture we can run locally during the build. - platforms[0]="Win32" - else - platforms[0]="ARM" - fi ;; *) die "Unsupported target $target!" ;; @@ -400,23 +394,13 @@ generate_vcxproj() { if [ "$hostplat" == "ARM" ]; then hostplat=Win32 fi - open_tag PreBuildEvent - tag_content Command "call obj_int_extract.bat "$src_path_bare" $hostplat\\\$(Configuration)" - close_tag PreBuildEvent fi open_tag ClCompile if [ "$config" = "Debug" ]; then opt=Disabled runtime=$debug_runtime curlibs=$debug_libs - case "$name" in - obj_int_extract) - debug=DEBUG - ;; - *) - debug=_DEBUG - ;; - esac + debug=_DEBUG else opt=MaxSpeed runtime=$release_runtime @@ -424,14 +408,7 @@ generate_vcxproj() { tag_content FavorSizeOrSpeed Speed debug=NDEBUG fi - case "$name" in - obj_int_extract) - extradefines=";_CONSOLE" - ;; - *) - extradefines=";$defines" - ;; - esac + extradefines=";$defines" tag_content Optimization $opt tag_content AdditionalIncludeDirectories "$incs;%(AdditionalIncludeDirectories)" tag_content PreprocessorDefinitions "WIN32;$debug;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE$extradefines;%(PreprocessorDefinitions)" @@ -451,10 +428,6 @@ generate_vcxproj() { case "$proj_kind" in exe) open_tag Link - if [ "$name" != "obj_int_extract" ]; then - tag_content AdditionalDependencies "$curlibs;%(AdditionalDependencies)" - tag_content AdditionalLibraryDirectories "$libdirs;%(AdditionalLibraryDirectories)" - fi tag_content GenerateDebugInformation true # Console is the default normally, but if # AppContainerApplication is set, we need to override it. diff --git a/build/make/rtcd.pl b/build/make/rtcd.pl index 0872414cb..bfc91a585 100755 --- a/build/make/rtcd.pl +++ b/build/make/rtcd.pl @@ -379,14 +379,11 @@ if ($opts{arch} eq 'x86') { } close CONFIG_FILE; mips; -} elsif ($opts{arch} eq 'armv5te') { - @ALL_ARCHS = filter(qw/edsp/); - arm; } elsif ($opts{arch} eq 'armv6') { - @ALL_ARCHS = filter(qw/edsp media/); + @ALL_ARCHS = filter(qw/media/); arm; } elsif ($opts{arch} eq 'armv7') { - @ALL_ARCHS = filter(qw/edsp media neon_asm neon/); + @ALL_ARCHS = filter(qw/media neon_asm neon/); @REQUIRES = filter(keys %required ? keys %required : qw/media/); &require(@REQUIRES); arm; diff --git a/build/x86-msvs/obj_int_extract.bat b/build/x86-msvs/obj_int_extract.bat deleted file mode 100644 index dfa3b9083..000000000 --- a/build/x86-msvs/obj_int_extract.bat +++ /dev/null @@ -1,15 +0,0 @@ -REM Copyright (c) 2011 The WebM project authors. All Rights Reserved. -REM -REM Use of this source code is governed by a BSD-style license -REM that can be found in the LICENSE file in the root of the source -REM tree. An additional intellectual property rights grant can be found -REM in the file PATENTS. All contributing project authors may -REM be found in the AUTHORS file in the root of the source tree. -echo on - -REM Arguments: -REM %1 - Relative path to the directory containing the vp8 source directory. -REM %2 - Path to obj_int_extract.exe. -cl /I. /I%1 /nologo /c "%~1/vp8/encoder/vp8_asm_enc_offsets.c" -%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm" - diff --git a/configure b/configure index 3ed976c83..fb45d7ccb 100755 --- a/configure +++ b/configure @@ -451,8 +451,6 @@ process_targets() { enabled child || write_common_config_banner enabled universal || write_common_target_config_h ${BUILD_PFX}vpx_config.h - # TODO: add host tools target (obj_int_extract, etc) - # For fat binaries, call configure recursively to configure for each # binary architecture to be included. if enabled universal; then diff --git a/libs.mk b/libs.mk index f9f2d8070..38b4b1112 100644 --- a/libs.mk +++ b/libs.mk @@ -17,32 +17,6 @@ else ASM:=.asm endif -# -# Calculate platform- and compiler-specific offsets for hand coded assembly -# -ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC)) -OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU' -define asm_offsets_template -$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).S - @echo " [CREATE] $$@" - $$(qexec)LC_ALL=C grep $$(OFFSET_PATTERN) $$< | tr -d '$$$$\#' $$(ADS2GAS) > $$@ -$$(BUILD_PFX)$(2).S: $(2) -CLEAN-OBJS += $$(BUILD_PFX)$(1) $(2).S -endef -else - ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC)) -define asm_offsets_template -$$(BUILD_PFX)$(1): obj_int_extract -$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).o - @echo " [CREATE] $$@" - $$(qexec)./obj_int_extract rvds $$< $$(ADS2GAS) > $$@ -OBJS-yes += $$(BUILD_PFX)$(2).o -CLEAN-OBJS += $$(BUILD_PFX)$(1) -$$(filter %$$(ASM).o,$$(OBJS-yes)): $$(BUILD_PFX)$(1) -endef -endif # rvct -endif # !gcc - # # Rule to generate runtime cpu detection files # @@ -212,26 +186,6 @@ CLEAN-OBJS += libvpx_srcs.txt ifeq ($(CONFIG_EXTERNAL_BUILD),yes) ifeq ($(CONFIG_MSVS),yes) -obj_int_extract.bat: $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat - @cp $^ $@ - -obj_int_extract.$(VCPROJ_SFX): obj_int_extract.bat -obj_int_extract.$(VCPROJ_SFX): $(SRC_PATH_BARE)/build/make/obj_int_extract.c - @echo " [CREATE] $@" - $(qexec)$(GEN_VCPROJ) \ - --exe \ - --target=$(TOOLCHAIN) \ - --name=obj_int_extract \ - --ver=$(CONFIG_VS_VERSION) \ - --proj-guid=E1360C65-D375-4335-8057-7ED99CC3F9B2 \ - --src-path-bare="$(SRC_PATH_BARE)" \ - $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ - --out=$@ $^ \ - -I. \ - -I"$(SRC_PATH_BARE)" \ - -PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.$(VCPROJ_SFX) - vpx.def: $(call enabled,CODEC_EXPORTS) @echo " [CREATE] $@" $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\ @@ -246,7 +200,7 @@ ASM_INCLUDES := \ vpx_config.asm \ vpx_ports/x86_abi_support.asm \ -vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX) +vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def @echo " [CREATE] $@" $(qexec)$(GEN_VCPROJ) \ $(if $(CONFIG_SHARED),--dll,--lib) \ @@ -377,7 +331,7 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_config.asm endif # -# Add assembler dependencies for configuration and offsets +# Add assembler dependencies for configuration. # $(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm diff --git a/solution.mk b/solution.mk index 2c8d29a2a..145adc0dd 100644 --- a/solution.mk +++ b/solution.mk @@ -9,7 +9,7 @@ ## # libvpx reverse dependencies (targets that depend on libvpx) -VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest obj_int_extract) +VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest) VPX_RDEPS=$(foreach vcp,\ $(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.$(VCPROJ_SFX)=):vpx) @@ -17,7 +17,6 @@ vpx.sln: $(wildcard *.$(VCPROJ_SFX)) @echo " [CREATE] $@" $(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \ $(if $(filter vpx.$(VCPROJ_SFX),$^),$(VPX_RDEPS)) \ - --dep=vpx:obj_int_extract \ --dep=test_libvpx:gtest \ --ver=$(CONFIG_VS_VERSION)\ --out=$@ $^ diff --git a/test/quantize_test.cc b/test/quantize_test.cc index 3e717ec61..756d0f696 100644 --- a/test/quantize_test.cc +++ b/test/quantize_test.cc @@ -181,12 +181,6 @@ INSTANTIATE_TEST_CASE_P( &vp8_regular_quantize_b_c))); #endif // HAVE_SSE4_1 -#if HAVE_MEDIA -INSTANTIATE_TEST_CASE_P(MEDIA, QuantizeTest, - ::testing::Values(make_tuple(&vp8_fast_quantize_b_armv6, - &vp8_fast_quantize_b_c))); -#endif // HAVE_MEDIA - #if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest, ::testing::Values(make_tuple(&vp8_fast_quantize_b_neon, diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl index 67560084c..63fde4c9c 100644 --- a/vp8/common/rtcd_defs.pl +++ b/vp8/common/rtcd_defs.pl @@ -454,8 +454,7 @@ add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *"; specialize qw/vp8_regular_quantize_b sse2 sse4_1/; add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *"; -specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/; -$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6; +specialize qw/vp8_fast_quantize_b sse2 ssse3 neon/; # # Block subtraction @@ -473,16 +472,13 @@ specialize qw/vp8_mbuverror mmx sse2/; $vp8_mbuverror_sse2=vp8_mbuverror_xmm; add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch"; -specialize qw/vp8_subtract_b mmx sse2 media neon/; -$vp8_subtract_b_media=vp8_subtract_b_armv6; +specialize qw/vp8_subtract_b mmx sse2 neon/; add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride"; -specialize qw/vp8_subtract_mby mmx sse2 media neon/; -$vp8_subtract_mby_media=vp8_subtract_mby_armv6; +specialize qw/vp8_subtract_mby mmx sse2 neon/; add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride"; -specialize qw/vp8_subtract_mbuv mmx sse2 media neon/; -$vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6; +specialize qw/vp8_subtract_mbuv mmx sse2 neon/; # # Motion search diff --git a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm deleted file mode 100644 index 4abe818f1..000000000 --- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm +++ /dev/null @@ -1,310 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_start_encode| - EXPORT |vp8_encode_bool| - EXPORT |vp8_stop_encode| - EXPORT |vp8_encode_value| - IMPORT |vp8_validate_buffer_arm| - - INCLUDE vp8_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - - ; macro for validating write buffer position - ; needs vp8_writer in r0 - ; start shall not be in r1 - MACRO - VALIDATE_POS $start, $pos - push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call - ldr r2, [r0, #vp8_writer_buffer_end] - ldr r3, [r0, #vp8_writer_error] - mov r1, $pos - mov r0, $start - bl vp8_validate_buffer_arm - pop {r0-r3, r12, lr} - MEND - -; r0 BOOL_CODER *br -; r1 unsigned char *source -; r2 unsigned char *source_end -|vp8_start_encode| PROC - str r2, [r0, #vp8_writer_buffer_end] - mov r12, #0 - mov r3, #255 - mvn r2, #23 - str r12, [r0, #vp8_writer_lowvalue] - str r3, [r0, #vp8_writer_range] - str r2, [r0, #vp8_writer_count] - str r12, [r0, #vp8_writer_pos] - str r1, [r0, #vp8_writer_buffer] - bx lr - ENDP - -; r0 BOOL_CODER *br -; r1 int bit -; r2 int probability -|vp8_encode_bool| PROC - push {r4-r10, lr} - - mov r4, r2 - - ldr r2, [r0, #vp8_writer_lowvalue] - ldr r5, [r0, #vp8_writer_range] - ldr r3, [r0, #vp8_writer_count] - - sub r7, r5, #1 ; range-1 - - cmp r1, #0 - mul r6, r4, r7 ; ((range-1) * probability) - - mov r7, #1 - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8) - - addne r2, r2, r4 ; if (bit) lowvalue += split - subne r4, r5, r4 ; if (bit) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp8_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r9, #0 - strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp8_writer_buffer] - ldrb r1, [r7, r4] - cmpge r1, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp8_writer_buffer] - ldrb r9, [r7, r4] ; w->buffer[x] - add r9, r9, #1 - strb r9, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r9, [r0, #vp8_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp8_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r1, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r1, [r0, #vp8_writer_pos] - sub r3, r3, #8 ; count -= 8 - - VALIDATE_POS r9, r1 ; validate_buffer at pos - - strb r7, [r9, r4] ; w->buffer[w->pos++] - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - str r2, [r0, #vp8_writer_lowvalue] - str r5, [r0, #vp8_writer_range] - str r3, [r0, #vp8_writer_count] - pop {r4-r10, pc} - ENDP - -; r0 BOOL_CODER *br -|vp8_stop_encode| PROC - push {r4-r10, lr} - - ldr r2, [r0, #vp8_writer_lowvalue] - ldr r5, [r0, #vp8_writer_range] - ldr r3, [r0, #vp8_writer_count] - - mov r10, #32 - -stop_encode_loop - sub r7, r5, #1 ; range-1 - - mov r4, r7, lsl #7 ; ((range-1) * 128) - - mov r7, #1 - add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero_se ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set_se - - ldr r4, [r0, #vp8_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start_se -token_zero_while_loop_se - mov r9, #0 - strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start_se - cmp r4, #0 - ldrge r7, [r0, #vp8_writer_buffer] - ldrb r1, [r7, r4] - cmpge r1, #0xff - beq token_zero_while_loop_se - - ldr r7, [r0, #vp8_writer_buffer] - ldrb r9, [r7, r4] ; w->buffer[x] - add r9, r9, #1 - strb r9, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set_se - rsb r4, r6, #24 ; 24-offset - ldr r9, [r0, #vp8_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp8_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r1, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r1, [r0, #vp8_writer_pos] - sub r3, r3, #8 ; count -= 8 - - VALIDATE_POS r9, r1 ; validate_buffer at pos - - strb r7, [r9, r4] ; w->buffer[w->pos++] - -token_count_lt_zero_se - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r10, r10, #1 - bne stop_encode_loop - - str r2, [r0, #vp8_writer_lowvalue] - str r5, [r0, #vp8_writer_range] - str r3, [r0, #vp8_writer_count] - pop {r4-r10, pc} - - ENDP - -; r0 BOOL_CODER *br -; r1 int data -; r2 int bits -|vp8_encode_value| PROC - push {r4-r12, lr} - - mov r10, r2 - - ldr r2, [r0, #vp8_writer_lowvalue] - ldr r5, [r0, #vp8_writer_range] - ldr r3, [r0, #vp8_writer_count] - - rsb r4, r10, #32 ; 32-n - - ; v is kept in r1 during the token pack loop - lsl r1, r1, r4 ; r1 = v << 32 - n - -encode_value_loop - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r1, r1, #1 ; bit = v >> n - mov r4, r7, lsl #7 ; ((range-1) * 128) - - mov r7, #1 - add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) - - addcs r2, r2, r4 ; if (bit) lowvalue += split - subcs r4, r5, r4 ; if (bit) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero_ev ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set_ev - - ldr r4, [r0, #vp8_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start_ev -token_zero_while_loop_ev - mov r9, #0 - strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start_ev - cmp r4, #0 - ldrge r7, [r0, #vp8_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop_ev - - ldr r7, [r0, #vp8_writer_buffer] - ldrb r9, [r7, r4] ; w->buffer[x] - add r9, r9, #1 - strb r9, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set_ev - rsb r4, r6, #24 ; 24-offset - ldr r9, [r0, #vp8_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp8_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp8_writer_pos] - sub r3, r3, #8 ; count -= 8 - - VALIDATE_POS r9, r11 ; validate_buffer at pos - - strb r7, [r9, r4] ; w->buffer[w->pos++] - -token_count_lt_zero_ev - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r10, r10, #1 - bne encode_value_loop - - str r2, [r0, #vp8_writer_lowvalue] - str r5, [r0, #vp8_writer_range] - str r3, [r0, #vp8_writer_count] - pop {r4-r12, pc} - ENDP - - END diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm deleted file mode 100644 index 90a141c62..000000000 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm +++ /dev/null @@ -1,317 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8cx_pack_tokens_armv5| - IMPORT |vp8_validate_buffer_arm| - - INCLUDE vp8_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - - - ; macro for validating write buffer position - ; needs vp8_writer in r0 - ; start shall not be in r1 - MACRO - VALIDATE_POS $start, $pos - push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call - ldr r2, [r0, #vp8_writer_buffer_end] - ldr r3, [r0, #vp8_writer_error] - mov r1, $pos - mov r0, $start - bl vp8_validate_buffer_arm - pop {r0-r3, r12, lr} - MEND - - -; r0 vp8_writer *w -; r1 const TOKENEXTRA *p -; r2 int xcount -; r3 vp8_coef_encodings -; s0 vp8_extra_bits -; s1 vp8_coef_tree -|vp8cx_pack_tokens_armv5| PROC - push {r4-r12, lr} - sub sp, sp, #16 - - ; Add size of xcount * sizeof (TOKENEXTRA) to get stop - ; sizeof (TOKENEXTRA) is 8 - add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA) - str r2, [sp, #0] - str r3, [sp, #8] ; save vp8_coef_encodings - ldr r2, [r0, #vp8_writer_lowvalue] - ldr r5, [r0, #vp8_writer_range] - ldr r3, [r0, #vp8_writer_count] - b check_p_lt_stop - -while_p_lt_stop - ldrb r6, [r1, #tokenextra_token] ; t - ldr r4, [sp, #8] ; vp8_coef_encodings - mov lr, #0 - add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t - ldr r9, [r1, #tokenextra_context_tree] ; pp - - ldrb r7, [r1, #tokenextra_skip_eob_node] - - ldr r6, [r4, #vp8_token_value] ; v - ldr r8, [r4, #vp8_token_len] ; n - - ; vp8 specific skip_eob_node - cmp r7, #0 - movne lr, #2 ; i = 2 - subne r8, r8, #1 ; --n - - rsb r4, r8, #32 ; 32-n - ldr r10, [sp, #60] ; vp8_coef_tree - - ; v is kept in r12 during the token pack loop - lsl r12, r6, r4 ; r12 = v << 32 - n - -; loop start -token_loop - ldrb r4, [r9, lr, asr #1] ; pp [i>>1] - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r12, r12, #1 ; bb = v >> n - mul r6, r4, r7 ; ((range-1) * pp[i>>1])) - - ; bb can only be 0 or 1. So only execute this statement - ; if bb == 1, otherwise it will act like i + 0 - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp8_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp8_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp8_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp8_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp8_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp8_writer_pos] - sub r3, r3, #8 ; count -= 8 - - VALIDATE_POS r10, r11 ; validate_buffer at pos - - strb r7, [r10, r4] ; w->buffer[w->pos++] - - ; r10 is used earlier in the loop, but r10 is used as - ; temp variable here. So after r10 is used, reload - ; vp8_coef_tree_dcd into r10 - ldr r10, [sp, #60] ; vp8_coef_tree - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r8, r8, #1 ; --n - bne token_loop - - ldrb r6, [r1, #tokenextra_token] ; t - ldr r7, [sp, #56] ; vp8_extra_bits - ; Add t * sizeof (vp8_extra_bit_struct) to get the desired - ; element. Here vp8_extra_bit_struct == 16 - add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t - - ldr r4, [r12, #vp8_extra_bit_struct_base_val] - cmp r4, #0 - beq skip_extra_bits - -; if( b->base_val) - ldr r8, [r12, #vp8_extra_bit_struct_len] ; L - ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra - cmp r8, #0 ; if( L) - beq no_extra_bits - - ldr r9, [r12, #vp8_extra_bit_struct_prob] - asr r7, lr, #1 ; v=e>>1 - - ldr r10, [r12, #vp8_extra_bit_struct_tree] - str r10, [sp, #4] ; b->tree - - rsb r4, r8, #32 - lsl r12, r7, r4 - - mov lr, #0 ; i = 0 - -extra_bits_loop - ldrb r4, [r9, lr, asr #1] ; pp[i>>1] - sub r7, r5, #1 ; range-1 - lsls r12, r12, #1 ; v >> n - mul r6, r4, r7 ; (range-1) * pp[i>>1] - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = b->tree[i+bb] - add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - clz r6, r4 - sub r6, r6, #24 - - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi extra_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset= shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl extra_high_bit_not_set - - ldr r4, [r0, #vp8_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos - 1 - b extra_zero_while_start -extra_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -extra_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp8_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq extra_zero_while_loop - - ldr r7, [r0, #vp8_writer_buffer] - ldrb r10, [r7, r4] - add r10, r10, #1 - strb r10, [r7, r4] -extra_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp8_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp8_writer_pos] - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp8_writer_pos] - sub r3, r3, #8 ; count -= 8 - - VALIDATE_POS r10, r11 ; validate_buffer at pos - - strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) - ldr r10, [sp, #4] ; b->tree -extra_count_lt_zero - lsl r2, r2, r6 - - subs r8, r8, #1 ; --n - bne extra_bits_loop ; while (n) - -no_extra_bits - ldr lr, [r1, #4] ; e = p->Extra - add r4, r5, #1 ; range + 1 - tst lr, #1 - lsr r4, r4, #1 ; split = (range + 1) >> 1 - addne r2, r2, r4 ; lowvalue += split - subne r4, r5, r4 ; range = range-split - tst r2, #0x80000000 ; lowvalue & 0x80000000 - lsl r5, r4, #1 ; range <<= 1 - beq end_high_bit_not_set - - ldr r4, [r0, #vp8_writer_pos] - mov r7, #0 - sub r4, r4, #1 - b end_zero_while_start -end_zero_while_loop - strb r7, [r6, r4] - sub r4, r4, #1 ; x-- -end_zero_while_start - cmp r4, #0 - ldrge r6, [r0, #vp8_writer_buffer] - ldrb r12, [r6, r4] - cmpge r12, #0xff - beq end_zero_while_loop - - ldr r6, [r0, #vp8_writer_buffer] - ldrb r7, [r6, r4] - add r7, r7, #1 - strb r7, [r6, r4] -end_high_bit_not_set - adds r3, r3, #1 ; ++count - lsl r2, r2, #1 ; lowvalue <<= 1 - bne end_count_zero - - ldr r4, [r0, #vp8_writer_pos] - mvn r3, #7 - ldr r7, [r0, #vp8_writer_buffer] - lsr r6, r2, #24 ; lowvalue >> 24 - add r12, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r12, [r0, #vp8_writer_pos] - - VALIDATE_POS r7, r12 ; validate_buffer at pos - - strb r6, [r7, r4] -end_count_zero -skip_extra_bits - add r1, r1, #TOKENEXTRA_SZ ; ++p -check_p_lt_stop - ldr r4, [sp, #0] ; stop - cmp r1, r4 ; while( p < stop) - bcc while_p_lt_stop - - str r2, [r0, #vp8_writer_lowvalue] - str r5, [r0, #vp8_writer_range] - str r3, [r0, #vp8_writer_count] - add sp, sp, #16 - pop {r4-r12, pc} - ENDP - - END diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm deleted file mode 100644 index 3a8d17a81..000000000 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm +++ /dev/null @@ -1,352 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8cx_pack_mb_row_tokens_armv5| - IMPORT |vp8_validate_buffer_arm| - - INCLUDE vp8_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - - - ; macro for validating write buffer position - ; needs vp8_writer in r0 - ; start shall not be in r1 - MACRO - VALIDATE_POS $start, $pos - push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call - ldr r2, [r0, #vp8_writer_buffer_end] - ldr r3, [r0, #vp8_writer_error] - mov r1, $pos - mov r0, $start - bl vp8_validate_buffer_arm - pop {r0-r3, r12, lr} - MEND - -; r0 VP8_COMP *cpi -; r1 vp8_writer *w -; r2 vp8_coef_encodings -; r3 vp8_extra_bits -; s0 vp8_coef_tree - -|vp8cx_pack_mb_row_tokens_armv5| PROC - push {r4-r12, lr} - sub sp, sp, #24 - - ; Compute address of cpi->common.mb_rows - ldr r4, _VP8_COMP_common_ - ldr r6, _VP8_COMMON_MBrows_ - add r4, r0, r4 - - ldr r5, [r4, r6] ; load up mb_rows - - str r2, [sp, #20] ; save vp8_coef_encodings - str r5, [sp, #12] ; save mb_rows - str r3, [sp, #8] ; save vp8_extra_bits - - ldr r4, _VP8_COMP_tplist_ - add r4, r0, r4 - ldr r7, [r4, #0] ; dereference cpi->tp_list - - mov r0, r1 ; keep same as other loops - - ldr r2, [r0, #vp8_writer_lowvalue] - ldr r5, [r0, #vp8_writer_range] - ldr r3, [r0, #vp8_writer_count] - -mb_row_loop - - ldr r1, [r7, #tokenlist_start] - ldr r9, [r7, #tokenlist_stop] - str r9, [sp, #0] ; save stop for later comparison - str r7, [sp, #16] ; tokenlist address for next time - - b check_p_lt_stop - - ; actuall work gets done here! - -while_p_lt_stop - ldrb r6, [r1, #tokenextra_token] ; t - ldr r4, [sp, #20] ; vp8_coef_encodings - mov lr, #0 - add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t - ldr r9, [r1, #tokenextra_context_tree] ; pp - - ldrb r7, [r1, #tokenextra_skip_eob_node] - - ldr r6, [r4, #vp8_token_value] ; v - ldr r8, [r4, #vp8_token_len] ; n - - ; vp8 specific skip_eob_node - cmp r7, #0 - movne lr, #2 ; i = 2 - subne r8, r8, #1 ; --n - - rsb r4, r8, #32 ; 32-n - ldr r10, [sp, #64] ; vp8_coef_tree - - ; v is kept in r12 during the token pack loop - lsl r12, r6, r4 ; r12 = v << 32 - n - -; loop start -token_loop - ldrb r4, [r9, lr, asr #1] ; pp [i>>1] - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r12, r12, #1 ; bb = v >> n - mul r6, r4, r7 ; ((range-1) * pp[i>>1])) - - ; bb can only be 0 or 1. So only execute this statement - ; if bb == 1, otherwise it will act like i + 0 - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp8_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp8_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp8_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp8_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp8_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp8_writer_pos] - sub r3, r3, #8 ; count -= 8 - - VALIDATE_POS r10, r11 ; validate_buffer at pos - - strb r7, [r10, r4] ; w->buffer[w->pos++] - - ; r10 is used earlier in the loop, but r10 is used as - ; temp variable here. So after r10 is used, reload - ; vp8_coef_tree_dcd into r10 - ldr r10, [sp, #64] ; vp8_coef_tree - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r8, r8, #1 ; --n - bne token_loop - - ldrb r6, [r1, #tokenextra_token] ; t - ldr r7, [sp, #8] ; vp8_extra_bits - ; Add t * sizeof (vp8_extra_bit_struct) to get the desired - ; element. Here vp8_extra_bit_struct == 16 - add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t - - ldr r4, [r12, #vp8_extra_bit_struct_base_val] - cmp r4, #0 - beq skip_extra_bits - -; if( b->base_val) - ldr r8, [r12, #vp8_extra_bit_struct_len] ; L - ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra - cmp r8, #0 ; if( L) - beq no_extra_bits - - ldr r9, [r12, #vp8_extra_bit_struct_prob] - asr r7, lr, #1 ; v=e>>1 - - ldr r10, [r12, #vp8_extra_bit_struct_tree] - str r10, [sp, #4] ; b->tree - - rsb r4, r8, #32 - lsl r12, r7, r4 - - mov lr, #0 ; i = 0 - -extra_bits_loop - ldrb r4, [r9, lr, asr #1] ; pp[i>>1] - sub r7, r5, #1 ; range-1 - lsls r12, r12, #1 ; v >> n - mul r6, r4, r7 ; (range-1) * pp[i>>1] - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = b->tree[i+bb] - add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - clz r6, r4 - sub r6, r6, #24 - - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi extra_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset= shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl extra_high_bit_not_set - - ldr r4, [r0, #vp8_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos - 1 - b extra_zero_while_start -extra_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -extra_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp8_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq extra_zero_while_loop - - ldr r7, [r0, #vp8_writer_buffer] - ldrb r10, [r7, r4] - add r10, r10, #1 - strb r10, [r7, r4] -extra_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp8_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp8_writer_pos] - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp8_writer_pos] - sub r3, r3, #8 ; count -= 8 - - VALIDATE_POS r10, r11 ; validate_buffer at pos - - strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) - ldr r10, [sp, #4] ; b->tree -extra_count_lt_zero - lsl r2, r2, r6 - - subs r8, r8, #1 ; --n - bne extra_bits_loop ; while (n) - -no_extra_bits - ldr lr, [r1, #4] ; e = p->Extra - add r4, r5, #1 ; range + 1 - tst lr, #1 - lsr r4, r4, #1 ; split = (range + 1) >> 1 - addne r2, r2, r4 ; lowvalue += split - subne r4, r5, r4 ; range = range-split - tst r2, #0x80000000 ; lowvalue & 0x80000000 - lsl r5, r4, #1 ; range <<= 1 - beq end_high_bit_not_set - - ldr r4, [r0, #vp8_writer_pos] - mov r7, #0 - sub r4, r4, #1 - b end_zero_while_start -end_zero_while_loop - strb r7, [r6, r4] - sub r4, r4, #1 ; x-- -end_zero_while_start - cmp r4, #0 - ldrge r6, [r0, #vp8_writer_buffer] - ldrb r12, [r6, r4] - cmpge r12, #0xff - beq end_zero_while_loop - - ldr r6, [r0, #vp8_writer_buffer] - ldrb r7, [r6, r4] - add r7, r7, #1 - strb r7, [r6, r4] -end_high_bit_not_set - adds r3, r3, #1 ; ++count - lsl r2, r2, #1 ; lowvalue <<= 1 - bne end_count_zero - - ldr r4, [r0, #vp8_writer_pos] - mvn r3, #7 - ldr r7, [r0, #vp8_writer_buffer] - lsr r6, r2, #24 ; lowvalue >> 24 - add r12, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r12, [r0, #vp8_writer_pos] - - VALIDATE_POS r7, r12 ; validate_buffer at pos - - strb r6, [r7, r4] -end_count_zero -skip_extra_bits - add r1, r1, #TOKENEXTRA_SZ ; ++p -check_p_lt_stop - ldr r4, [sp, #0] ; stop - cmp r1, r4 ; while( p < stop) - bcc while_p_lt_stop - - ldr r6, [sp, #12] ; mb_rows - ldr r7, [sp, #16] ; tokenlist address - subs r6, r6, #1 - add r7, r7, #TOKENLIST_SZ ; next element in the array - str r6, [sp, #12] - bne mb_row_loop - - str r2, [r0, #vp8_writer_lowvalue] - str r5, [r0, #vp8_writer_range] - str r3, [r0, #vp8_writer_count] - add sp, sp, #24 - pop {r4-r12, pc} - ENDP - -_VP8_COMP_common_ - DCD vp8_comp_common -_VP8_COMMON_MBrows_ - DCD vp8_common_mb_rows -_VP8_COMP_tplist_ - DCD vp8_comp_tplist - - END diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm deleted file mode 100644 index e9aa4958f..000000000 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm +++ /dev/null @@ -1,471 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8cx_pack_tokens_into_partitions_armv5| - IMPORT |vp8_validate_buffer_arm| - - INCLUDE vp8_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA |.text|, CODE, READONLY - - ; macro for validating write buffer position - ; needs vp8_writer in r0 - ; start shall not be in r1 - MACRO - VALIDATE_POS $start, $pos - push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call - ldr r2, [r0, #vp8_writer_buffer_end] - ldr r3, [r0, #vp8_writer_error] - mov r1, $pos - mov r0, $start - bl vp8_validate_buffer_arm - pop {r0-r3, r12, lr} - MEND - -; r0 VP8_COMP *cpi -; r1 unsigned char *cx_data -; r2 const unsigned char *cx_data_end -; r3 int num_part -; s0 vp8_coef_encodings -; s1 vp8_extra_bits, -; s2 const vp8_tree_index * - -|vp8cx_pack_tokens_into_partitions_armv5| PROC - push {r4-r12, lr} - sub sp, sp, #40 - - ; Compute address of cpi->common.mb_rows - ldr r4, _VP8_COMP_common_ - ldr r6, _VP8_COMMON_MBrows_ - add r4, r0, r4 - - ldr r5, [r4, r6] ; load up mb_rows - - str r5, [sp, #36] ; save mb_rows - str r1, [sp, #24] ; save ptr = cx_data - str r3, [sp, #20] ; save num_part - str r2, [sp, #8] ; save cx_data_end - - ldr r4, _VP8_COMP_tplist_ - add r4, r0, r4 - ldr r7, [r4, #0] ; dereference cpi->tp_list - str r7, [sp, #32] ; store start of cpi->tp_list - - ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi - add r0, r0, r11 - - mov r11, #0 - str r11, [sp, #28] ; i - -numparts_loop - ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer) - add r0, r2 ; bc[i + 1] - - ldr r10, [sp, #24] ; ptr - ldr r5, [sp, #36] ; move mb_rows to the counting section - subs r5, r5, r11 ; move start point with each partition - ; mb_rows starts at i - str r5, [sp, #12] - - ; Reset all of the VP8 Writer data for each partition that - ; is processed. - ; start_encode - - ldr r3, [sp, #8] - str r3, [r0, #vp8_writer_buffer_end] - - mov r2, #0 ; vp8_writer_lowvalue - mov r5, #255 ; vp8_writer_range - mvn r3, #23 ; vp8_writer_count - - str r2, [r0, #vp8_writer_pos] - str r10, [r0, #vp8_writer_buffer] - - ble end_partition ; if (mb_rows <= 0) end partition - -mb_row_loop - - ldr r1, [r7, #tokenlist_start] - ldr r9, [r7, #tokenlist_stop] - str r9, [sp, #0] ; save stop for later comparison - str r7, [sp, #16] ; tokenlist address for next time - - b check_p_lt_stop - - ; actual work gets done here! - -while_p_lt_stop - ldrb r6, [r1, #tokenextra_token] ; t - ldr r4, [sp, #80] ; vp8_coef_encodings - mov lr, #0 - add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t - ldr r9, [r1, #tokenextra_context_tree] ; pp - - ldrb r7, [r1, #tokenextra_skip_eob_node] - - ldr r6, [r4, #vp8_token_value] ; v - ldr r8, [r4, #vp8_token_len] ; n - - ; vp8 specific skip_eob_node - cmp r7, #0 - movne lr, #2 ; i = 2 - subne r8, r8, #1 ; --n - - rsb r4, r8, #32 ; 32-n - ldr r10, [sp, #88] ; vp8_coef_tree - - ; v is kept in r12 during the token pack loop - lsl r12, r6, r4 ; r12 = v << 32 - n - -; loop start -token_loop - ldrb r4, [r9, lr, asr #1] ; pp [i>>1] - sub r7, r5, #1 ; range-1 - - ; Decisions are made based on the bit value shifted - ; off of v, so set a flag here based on this. - ; This value is refered to as "bb" - lsls r12, r12, #1 ; bb = v >> n - mul r6, r4, r7 ; ((range-1) * pp[i>>1])) - - ; bb can only be 0 or 1. So only execute this statement - ; if bb == 1, otherwise it will act like i + 0 - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] - add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set - - ldr r4, [r0, #vp8_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start -token_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp8_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop - - ldr r7, [r0, #vp8_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp8_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp8_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp8_writer_pos] - sub r3, r3, #8 ; count -= 8 - - VALIDATE_POS r10, r11 ; validate_buffer at pos - - strb r7, [r10, r4] ; w->buffer[w->pos++] - - ; r10 is used earlier in the loop, but r10 is used as - ; temp variable here. So after r10 is used, reload - ; vp8_coef_tree_dcd into r10 - ldr r10, [sp, #88] ; vp8_coef_tree - -token_count_lt_zero - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r8, r8, #1 ; --n - bne token_loop - - ldrb r6, [r1, #tokenextra_token] ; t - ldr r7, [sp, #84] ; vp8_extra_bits - ; Add t * sizeof (vp8_extra_bit_struct) to get the desired - ; element. Here vp8_extra_bit_struct == 16 - add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t - - ldr r4, [r12, #vp8_extra_bit_struct_base_val] - cmp r4, #0 - beq skip_extra_bits - -; if( b->base_val) - ldr r8, [r12, #vp8_extra_bit_struct_len] ; L - ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra - cmp r8, #0 ; if( L) - beq no_extra_bits - - ldr r9, [r12, #vp8_extra_bit_struct_prob] - asr r7, lr, #1 ; v=e>>1 - - ldr r10, [r12, #vp8_extra_bit_struct_tree] - str r10, [sp, #4] ; b->tree - - rsb r4, r8, #32 - lsl r12, r7, r4 - - mov lr, #0 ; i = 0 - -extra_bits_loop - ldrb r4, [r9, lr, asr #1] ; pp[i>>1] - sub r7, r5, #1 ; range-1 - lsls r12, r12, #1 ; v >> n - mul r6, r4, r7 ; (range-1) * pp[i>>1] - addcs lr, lr, #1 ; i + bb - - mov r7, #1 - ldrsb lr, [r10, lr] ; i = b->tree[i+bb] - add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) - - addcs r2, r2, r4 ; if (bb) lowvalue += split - subcs r4, r5, r4 ; if (bb) range = range-split - - clz r6, r4 - sub r6, r6, #24 - - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi extra_count_lt_zero ; if(count >= 0) - - sub r6, r6, r3 ; offset= shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl extra_high_bit_not_set - - ldr r4, [r0, #vp8_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos - 1 - b extra_zero_while_start -extra_zero_while_loop - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -extra_zero_while_start - cmp r4, #0 - ldrge r7, [r0, #vp8_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq extra_zero_while_loop - - ldr r7, [r0, #vp8_writer_buffer] - ldrb r10, [r7, r4] - add r10, r10, #1 - strb r10, [r7, r4] -extra_high_bit_not_set - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp8_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp8_writer_pos] - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp8_writer_pos] - sub r3, r3, #8 ; count -= 8 - - VALIDATE_POS r10, r11 ; validate_buffer at pos - - strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) - ldr r10, [sp, #4] ; b->tree -extra_count_lt_zero - lsl r2, r2, r6 - - subs r8, r8, #1 ; --n - bne extra_bits_loop ; while (n) - -no_extra_bits - ldr lr, [r1, #4] ; e = p->Extra - add r4, r5, #1 ; range + 1 - tst lr, #1 - lsr r4, r4, #1 ; split = (range + 1) >> 1 - addne r2, r2, r4 ; lowvalue += split - subne r4, r5, r4 ; range = range-split - tst r2, #0x80000000 ; lowvalue & 0x80000000 - lsl r5, r4, #1 ; range <<= 1 - beq end_high_bit_not_set - - ldr r4, [r0, #vp8_writer_pos] - mov r7, #0 - sub r4, r4, #1 - b end_zero_while_start -end_zero_while_loop - strb r7, [r6, r4] - sub r4, r4, #1 ; x-- -end_zero_while_start - cmp r4, #0 - ldrge r6, [r0, #vp8_writer_buffer] - ldrb r12, [r6, r4] - cmpge r12, #0xff - beq end_zero_while_loop - - ldr r6, [r0, #vp8_writer_buffer] - ldrb r7, [r6, r4] - add r7, r7, #1 - strb r7, [r6, r4] -end_high_bit_not_set - adds r3, r3, #1 ; ++count - lsl r2, r2, #1 ; lowvalue <<= 1 - bne end_count_zero - - ldr r4, [r0, #vp8_writer_pos] - mvn r3, #7 ; count = -8 - ldr r7, [r0, #vp8_writer_buffer] - lsr r6, r2, #24 ; lowvalue >> 24 - add r12, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r12, [r0, #vp8_writer_pos] - - VALIDATE_POS r7, r12 ; validate_buffer at pos - - strb r6, [r7, r4] -end_count_zero -skip_extra_bits - add r1, r1, #TOKENEXTRA_SZ ; ++p -check_p_lt_stop - ldr r4, [sp, #0] ; stop - cmp r1, r4 ; while( p < stop) - bcc while_p_lt_stop - - ldr r10, [sp, #20] ; num_parts - mov r1, #TOKENLIST_SZ - mul r1, r10, r1 - - ldr r6, [sp, #12] ; mb_rows - ldr r7, [sp, #16] ; tokenlist address - subs r6, r6, r10 - add r7, r7, r1 ; next element in the array - str r6, [sp, #12] - bgt mb_row_loop - -end_partition - mov r12, #32 - -stop_encode_loop - sub r7, r5, #1 ; range-1 - - mov r4, r7, lsl #7 ; ((range-1) * 128) - - mov r7, #1 - add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) - - ; Counting the leading zeros is used to normalize range. - clz r6, r4 - sub r6, r6, #24 ; shift - - ; Flag is set on the sum of count. This flag is used later - ; to determine if count >= 0 - adds r3, r3, r6 ; count += shift - lsl r5, r4, r6 ; range <<= shift - bmi token_count_lt_zero_se ; if(count >= 0) - - sub r6, r6, r3 ; offset = shift - count - sub r4, r6, #1 ; offset-1 - lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) - bpl token_high_bit_not_set_se - - ldr r4, [r0, #vp8_writer_pos] ; x - sub r4, r4, #1 ; x = w->pos-1 - b token_zero_while_start_se -token_zero_while_loop_se - mov r10, #0 - strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 - sub r4, r4, #1 ; x-- -token_zero_while_start_se - cmp r4, #0 - ldrge r7, [r0, #vp8_writer_buffer] - ldrb r11, [r7, r4] - cmpge r11, #0xff - beq token_zero_while_loop_se - - ldr r7, [r0, #vp8_writer_buffer] - ldrb r10, [r7, r4] ; w->buffer[x] - add r10, r10, #1 - strb r10, [r7, r4] ; w->buffer[x] + 1 -token_high_bit_not_set_se - rsb r4, r6, #24 ; 24-offset - ldr r10, [r0, #vp8_writer_buffer] - lsr r7, r2, r4 ; lowvalue >> (24-offset) - ldr r4, [r0, #vp8_writer_pos] ; w->pos - lsl r2, r2, r6 ; lowvalue <<= offset - mov r6, r3 ; shift = count - add r11, r4, #1 ; w->pos++ - bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff - str r11, [r0, #vp8_writer_pos] - sub r3, r3, #8 ; count -= 8 - - VALIDATE_POS r10, r11 ; validate_buffer at pos - - strb r7, [r10, r4] ; w->buffer[w->pos++] - -token_count_lt_zero_se - lsl r2, r2, r6 ; lowvalue <<= shift - - subs r12, r12, #1 - bne stop_encode_loop - - ldr r4, [r0, #vp8_writer_pos] ; w->pos - ldr r12, [sp, #24] ; ptr - add r12, r12, r4 ; ptr += w->pos - str r12, [sp, #24] - - ldr r11, [sp, #28] ; i - ldr r10, [sp, #20] ; num_parts - - add r11, r11, #1 ; i++ - str r11, [sp, #28] - - ldr r7, [sp, #32] ; cpi->tp_list[i] - mov r1, #TOKENLIST_SZ - add r7, r7, r1 ; next element in cpi->tp_list - str r7, [sp, #32] ; cpi->tp_list[i+1] - - cmp r10, r11 - bgt numparts_loop - - add sp, sp, #40 - pop {r4-r12, pc} - ENDP - -_VP8_COMP_common_ - DCD vp8_comp_common -_VP8_COMMON_MBrows_ - DCD vp8_common_mb_rows -_VP8_COMP_tplist_ - DCD vp8_comp_tplist -_VP8_COMP_bc_ - DCD vp8_comp_bc -_vp8_writer_sz_ - DCD vp8_writer_sz - - END diff --git a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm deleted file mode 100644 index de35a1e13..000000000 --- a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm +++ /dev/null @@ -1,225 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_fast_quantize_b_armv6| - - INCLUDE vp8_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 BLOCK *b -; r1 BLOCKD *d -|vp8_fast_quantize_b_armv6| PROC - stmfd sp!, {r1, r4-r11, lr} - - ldr r3, [r0, #vp8_block_coeff] ; coeff - ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast - ldr r5, [r0, #vp8_block_round] ; round - ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff - ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff - ldr r8, [r1, #vp8_blockd_dequant] ; dequant - - ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction - ; is used to update the counter so that - ; it can be used to mark nonzero - ; quantized coefficient pairs. - - mov r1, #0 ; flags for quantized coeffs - - ; PART 1: quantization and dequantization loop -loop - ldr r9, [r3], #4 ; [z1 | z0] - ldr r10, [r5], #4 ; [r1 | r0] - ldr r11, [r4], #4 ; [q1 | q0] - - ssat16 lr, #1, r9 ; [sz1 | sz0] - eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0] - ssub16 r9, r9, lr ; x = (z ^ sz) - sz - sadd16 r9, r9, r10 ; [x1+r1 | x0+r0] - - ldr r12, [r3], #4 ; [z3 | z2] - - smulbb r0, r9, r11 ; [(x0+r0)*q0] - smultt r9, r9, r11 ; [(x1+r1)*q1] - - ldr r10, [r5], #4 ; [r3 | r2] - - ssat16 r11, #1, r12 ; [sz3 | sz2] - eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2] - pkhtb r0, r9, r0, asr #16 ; [y1 | y0] - ldr r9, [r4], #4 ; [q3 | q2] - ssub16 r12, r12, r11 ; x = (z ^ sz) - sz - - sadd16 r12, r12, r10 ; [x3+r3 | x2+r2] - - eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)] - - smulbb r10, r12, r9 ; [(x2+r2)*q2] - smultt r12, r12, r9 ; [(x3+r3)*q3] - - ssub16 r0, r0, lr ; x = (y ^ sz) - sz - - cmp r0, #0 ; check if zero - orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs - - str r0, [r6], #4 ; *qcoeff++ = x - ldr r9, [r8], #4 ; [dq1 | dq0] - - pkhtb r10, r12, r10, asr #16 ; [y3 | y2] - eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)] - ssub16 r10, r10, r11 ; x = (y ^ sz) - sz - - cmp r10, #0 ; check if zero - orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs - - str r10, [r6], #4 ; *qcoeff++ = x - ldr r11, [r8], #4 ; [dq3 | dq2] - - smulbb r12, r0, r9 ; [x0*dq0] - smultt r0, r0, r9 ; [x1*dq1] - - smulbb r9, r10, r11 ; [x2*dq2] - smultt r10, r10, r11 ; [x3*dq3] - - lsls r2, r2, #2 ; update loop counter - strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0] - strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1] - strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2] - strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3] - add r7, r7, #8 ; dqcoeff += 8 - bne loop - - ; PART 2: check position for eob... - ldr r11, [sp, #0] ; restore BLOCKD pointer - mov lr, #0 ; init eob - cmp r1, #0 ; coeffs after quantization? - ldr r12, [r11, #vp8_blockd_eob] - beq end ; skip eob calculations if all zero - - ldr r0, [r11, #vp8_blockd_qcoeff] - - ; check shortcut for nonzero qcoeffs - tst r1, #0x80 - bne quant_coeff_15_14 - tst r1, #0x20 - bne quant_coeff_13_11 - tst r1, #0x8 - bne quant_coeff_12_7 - tst r1, #0x40 - bne quant_coeff_10_9 - tst r1, #0x10 - bne quant_coeff_8_3 - tst r1, #0x2 - bne quant_coeff_6_5 - tst r1, #0x4 - bne quant_coeff_4_2 - b quant_coeff_1_0 - -quant_coeff_15_14 - ldrh r2, [r0, #30] ; rc=15, i=15 - mov lr, #16 - cmp r2, #0 - bne end - - ldrh r3, [r0, #28] ; rc=14, i=14 - mov lr, #15 - cmp r3, #0 - bne end - -quant_coeff_13_11 - ldrh r2, [r0, #22] ; rc=11, i=13 - mov lr, #14 - cmp r2, #0 - bne end - -quant_coeff_12_7 - ldrh r3, [r0, #14] ; rc=7, i=12 - mov lr, #13 - cmp r3, #0 - bne end - - ldrh r2, [r0, #20] ; rc=10, i=11 - mov lr, #12 - cmp r2, #0 - bne end - -quant_coeff_10_9 - ldrh r3, [r0, #26] ; rc=13, i=10 - mov lr, #11 - cmp r3, #0 - bne end - - ldrh r2, [r0, #24] ; rc=12, i=9 - mov lr, #10 - cmp r2, #0 - bne end - -quant_coeff_8_3 - ldrh r3, [r0, #18] ; rc=9, i=8 - mov lr, #9 - cmp r3, #0 - bne end - - ldrh r2, [r0, #12] ; rc=6, i=7 - mov lr, #8 - cmp r2, #0 - bne end - -quant_coeff_6_5 - ldrh r3, [r0, #6] ; rc=3, i=6 - mov lr, #7 - cmp r3, #0 - bne end - - ldrh r2, [r0, #4] ; rc=2, i=5 - mov lr, #6 - cmp r2, #0 - bne end - -quant_coeff_4_2 - ldrh r3, [r0, #10] ; rc=5, i=4 - mov lr, #5 - cmp r3, #0 - bne end - - ldrh r2, [r0, #16] ; rc=8, i=3 - mov lr, #4 - cmp r2, #0 - bne end - - ldrh r3, [r0, #8] ; rc=4, i=2 - mov lr, #3 - cmp r3, #0 - bne end - -quant_coeff_1_0 - ldrh r2, [r0, #2] ; rc=1, i=1 - mov lr, #2 - cmp r2, #0 - bne end - - mov lr, #1 ; rc=0, i=0 - -end - strb lr, [r12] - ldmfd sp!, {r1, r4-r11, pc} - - ENDP - -loop_count - DCD 0x1000000 - - END - diff --git a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm deleted file mode 100644 index 05746cf7f..000000000 --- a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm +++ /dev/null @@ -1,272 +0,0 @@ -; -; Copyright (c) 2011 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_subtract_mby_armv6| - EXPORT |vp8_subtract_mbuv_armv6| - EXPORT |vp8_subtract_b_armv6| - - INCLUDE vp8_asm_enc_offsets.asm - - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -; r0 BLOCK *be -; r1 BLOCKD *bd -; r2 int pitch -|vp8_subtract_b_armv6| PROC - - stmfd sp!, {r4-r9} - - ldr r4, [r0, #vp8_block_base_src] - ldr r5, [r0, #vp8_block_src] - ldr r6, [r0, #vp8_block_src_diff] - - ldr r3, [r4] - ldr r7, [r0, #vp8_block_src_stride] - add r3, r3, r5 ; src = *base_src + src - ldr r8, [r1, #vp8_blockd_predictor] - - mov r9, #4 ; loop count - -loop_block - - ldr r0, [r3], r7 ; src - ldr r1, [r8], r2 ; pred - - uxtb16 r4, r0 ; [s2 | s0] - uxtb16 r5, r1 ; [p2 | p0] - uxtb16 r0, r0, ror #8 ; [s3 | s1] - uxtb16 r1, r1, ror #8 ; [p3 | p1] - - usub16 r4, r4, r5 ; [d2 | d0] - usub16 r5, r0, r1 ; [d3 | d1] - - subs r9, r9, #1 ; decrement loop counter - - pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] - pkhtb r1, r5, r4, asr #16 ; [d3 | d2] - - str r0, [r6, #0] ; diff - str r1, [r6, #4] ; diff - - add r6, r6, r2, lsl #1 ; update diff pointer - bne loop_block - - ldmfd sp!, {r4-r9} - mov pc, lr - - ENDP - - -; r0 short *diff -; r1 unsigned char *usrc -; r2 unsigned char *vsrc -; r3 int src_stride -; sp unsigned char *upred -; sp unsigned char *vpred -; sp int pred_stride -|vp8_subtract_mbuv_armv6| PROC - - stmfd sp!, {r4-r11} - - add r0, r0, #512 ; set *diff point to Cb - mov r4, #8 ; loop count - ldr r5, [sp, #32] ; upred - ldr r12, [sp, #40] ; pred_stride - - ; Subtract U block -loop_u - ldr r6, [r1] ; usrc (A) - ldr r7, [r5] ; upred (A) - - uxtb16 r8, r6 ; [s2 | s0] (A) - uxtb16 r9, r7 ; [p2 | p0] (A) - uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) - uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) - - usub16 r6, r8, r9 ; [d2 | d0] (A) - usub16 r7, r10, r11 ; [d3 | d1] (A) - - ldr r10, [r1, #4] ; usrc (B) - ldr r11, [r5, #4] ; upred (B) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) - - str r8, [r0], #4 ; diff (A) - uxtb16 r8, r10 ; [s2 | s0] (B) - str r9, [r0], #4 ; diff (A) - - uxtb16 r9, r11 ; [p2 | p0] (B) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) - - usub16 r6, r8, r9 ; [d2 | d0] (B) - usub16 r7, r10, r11 ; [d3 | d1] (B) - - add r1, r1, r3 ; update usrc pointer - add r5, r5, r12 ; update upred pointer - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) - - str r8, [r0], #4 ; diff (B) - subs r4, r4, #1 ; update loop counter - str r9, [r0], #4 ; diff (B) - - bne loop_u - - ldr r5, [sp, #36] ; vpred - mov r4, #8 ; loop count - - ; Subtract V block -loop_v - ldr r6, [r2] ; vsrc (A) - ldr r7, [r5] ; vpred (A) - - uxtb16 r8, r6 ; [s2 | s0] (A) - uxtb16 r9, r7 ; [p2 | p0] (A) - uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) - uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) - - usub16 r6, r8, r9 ; [d2 | d0] (A) - usub16 r7, r10, r11 ; [d3 | d1] (A) - - ldr r10, [r2, #4] ; vsrc (B) - ldr r11, [r5, #4] ; vpred (B) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) - - str r8, [r0], #4 ; diff (A) - uxtb16 r8, r10 ; [s2 | s0] (B) - str r9, [r0], #4 ; diff (A) - - uxtb16 r9, r11 ; [p2 | p0] (B) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) - - usub16 r6, r8, r9 ; [d2 | d0] (B) - usub16 r7, r10, r11 ; [d3 | d1] (B) - - add r2, r2, r3 ; update vsrc pointer - add r5, r5, r12 ; update vpred pointer - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) - - str r8, [r0], #4 ; diff (B) - subs r4, r4, #1 ; update loop counter - str r9, [r0], #4 ; diff (B) - - bne loop_v - - ldmfd sp!, {r4-r11} - bx lr - - ENDP - - -; r0 short *diff -; r1 unsigned char *src -; r2 int src_stride -; r3 unsigned char *pred -; sp int pred_stride -|vp8_subtract_mby_armv6| PROC - - stmfd sp!, {r4-r11} - ldr r12, [sp, #32] ; pred_stride - mov r4, #16 -loop - ldr r6, [r1] ; src (A) - ldr r7, [r3] ; pred (A) - - uxtb16 r8, r6 ; [s2 | s0] (A) - uxtb16 r9, r7 ; [p2 | p0] (A) - uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) - uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) - - usub16 r6, r8, r9 ; [d2 | d0] (A) - usub16 r7, r10, r11 ; [d3 | d1] (A) - - ldr r10, [r1, #4] ; src (B) - ldr r11, [r3, #4] ; pred (B) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) - - str r8, [r0], #4 ; diff (A) - uxtb16 r8, r10 ; [s2 | s0] (B) - str r9, [r0], #4 ; diff (A) - - uxtb16 r9, r11 ; [p2 | p0] (B) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) - - usub16 r6, r8, r9 ; [d2 | d0] (B) - usub16 r7, r10, r11 ; [d3 | d1] (B) - - ldr r10, [r1, #8] ; src (C) - ldr r11, [r3, #8] ; pred (C) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) - - str r8, [r0], #4 ; diff (B) - uxtb16 r8, r10 ; [s2 | s0] (C) - str r9, [r0], #4 ; diff (B) - - uxtb16 r9, r11 ; [p2 | p0] (C) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) - - usub16 r6, r8, r9 ; [d2 | d0] (C) - usub16 r7, r10, r11 ; [d3 | d1] (C) - - ldr r10, [r1, #12] ; src (D) - ldr r11, [r3, #12] ; pred (D) - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) - - str r8, [r0], #4 ; diff (C) - uxtb16 r8, r10 ; [s2 | s0] (D) - str r9, [r0], #4 ; diff (C) - - uxtb16 r9, r11 ; [p2 | p0] (D) - uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) - uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) - - usub16 r6, r8, r9 ; [d2 | d0] (D) - usub16 r7, r10, r11 ; [d3 | d1] (D) - - add r1, r1, r2 ; update src pointer - add r3, r3, r12 ; update pred pointer - - pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) - pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) - - str r8, [r0], #4 ; diff (D) - subs r4, r4, #1 ; update loop counter - str r9, [r0], #4 ; diff (D) - - bne loop - - ldmfd sp!, {r4-r11} - bx lr - - ENDP - - END - diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c deleted file mode 100644 index 17a941bfc..000000000 --- a/vp8/encoder/arm/boolhuff_arm.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/encoder/boolhuff.h" -#include "vpx/internal/vpx_codec_internal.h" - -const unsigned int vp8_prob_cost[256] = -{ - 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046, - 1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, 843, 829, 816, 803, 790, 778, - 767, 755, 744, 733, 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625, - 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516, - 511, 505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442, 437, 433, - 428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365, - 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, 317, 314, 311, 307, - 304, 301, 297, 294, 291, 288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257, - 255, 252, 249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214, - 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, 181, 179, 177, 174, - 172, 170, 168, 165, 163, 161, 159, 156, 154, 152, 150, 148, 145, 143, 141, 139, - 137, 135, 133, 131, 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, - 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77, - 75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 53, 51, 50, - 48, 46, 45, 43, 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24, - 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1 -}; - -int vp8_validate_buffer_arm(const unsigned char *start, - size_t len, - const unsigned char *end, - struct vpx_internal_error_info *error) -{ - return validate_buffer(start, len, end, error); -} diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index 9d0e69cf4..f9096f9f0 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -159,7 +159,7 @@ static void write_split(vp8_writer *bc, int x) ); } -void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount) +void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount) { const TOKENEXTRA *stop = p + xcount; unsigned int split; @@ -374,7 +374,7 @@ static void write_partition_size(unsigned char *cx_data, int size) } -static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data, +static void pack_tokens_into_partitions(VP8_COMP *cpi, unsigned char *cx_data, unsigned char * cx_data_end, int num_part) { @@ -398,7 +398,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data, const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; int tokens = (int)(stop - p); - vp8_pack_tokens_c(w, p, tokens); + vp8_pack_tokens(w, p, tokens); } vp8_stop_encode(w); @@ -407,7 +407,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data, } -static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w) +static void pack_mb_row_tokens(VP8_COMP *cpi, vp8_writer *w) { int mb_row; @@ -417,7 +417,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w) const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; int tokens = (int)(stop - p); - vp8_pack_tokens_c(w, p, tokens); + vp8_pack_tokens(w, p, tokens); } } @@ -1676,7 +1676,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest pack_mb_row_tokens(cpi, &cpi->bc[1]); else #endif - pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count); + vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count); vp8_stop_encode(&cpi->bc[1]); diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h index 66f4bf67e..de6980551 100644 --- a/vp8/encoder/bitstream.h +++ b/vp8/encoder/bitstream.h @@ -16,36 +16,7 @@ extern "C" { #endif -#if HAVE_EDSP -void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount, - vp8_token *, - const vp8_extra_bit_struct *, - const vp8_tree_index *); -void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, - unsigned char * cx_data, - const unsigned char *cx_data_end, - int num_parts, - vp8_token *, - const vp8_extra_bit_struct *, - const vp8_tree_index *); -void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w, - vp8_token *, - const vp8_extra_bit_struct *, - const vp8_tree_index *); -# define pack_tokens(a,b,c) \ - vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) -# define pack_tokens_into_partitions(a,b,c,d) \ - vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) -# define pack_mb_row_tokens(a,b) \ - vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) -#else - -void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount); - -# define pack_tokens(a,b,c) vp8_pack_tokens_c(a,b,c) -# define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d) -# define pack_mb_row_tokens(a,b) pack_mb_row_tokens_c(a,b) -#endif +void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount); #ifdef __cplusplus } // extern "C" diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 85813b69f..2a3f69cc8 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -574,7 +574,7 @@ void encode_mb_row(VP8_COMP *cpi, /* pack tokens for this MB */ { int tok_count = *tp - tp_start; - pack_tokens(w, tp_start, tok_count); + vp8_pack_tokens(w, tp_start, tok_count); } #endif /* Increment pointer into gf usage flags structure. */ diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 781467955..3598a7a69 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -261,7 +261,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) /* pack tokens for this MB */ { int tok_count = tp - tp_start; - pack_tokens(w, tp_start, tok_count); + vp8_pack_tokens(w, tp_start, tok_count); } #else cpi->tplist[mb_row].stop = tp; diff --git a/vp8/encoder/vp8_asm_enc_offsets.c b/vp8/encoder/vp8_asm_enc_offsets.c index a4169b32f..ce8a670fa 100644 --- a/vp8/encoder/vp8_asm_enc_offsets.c +++ b/vp8/encoder/vp8_asm_enc_offsets.c @@ -10,84 +10,7 @@ #include "vpx_ports/asm_offsets.h" -#include "vpx_config.h" -#include "block.h" -#include "vp8/common/blockd.h" -#include "onyx_int.h" -#include "treewriter.h" -#include "tokenize.h" BEGIN -/* regular quantize */ -DEFINE(vp8_block_coeff, offsetof(BLOCK, coeff)); -DEFINE(vp8_block_zbin, offsetof(BLOCK, zbin)); -DEFINE(vp8_block_round, offsetof(BLOCK, round)); -DEFINE(vp8_block_quant, offsetof(BLOCK, quant)); -DEFINE(vp8_block_quant_fast, offsetof(BLOCK, quant_fast)); -DEFINE(vp8_block_zbin_extra, offsetof(BLOCK, zbin_extra)); -DEFINE(vp8_block_zrun_zbin_boost, offsetof(BLOCK, zrun_zbin_boost)); -DEFINE(vp8_block_quant_shift, offsetof(BLOCK, quant_shift)); - -DEFINE(vp8_blockd_qcoeff, offsetof(BLOCKD, qcoeff)); -DEFINE(vp8_blockd_dequant, offsetof(BLOCKD, dequant)); -DEFINE(vp8_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff)); -DEFINE(vp8_blockd_eob, offsetof(BLOCKD, eob)); - -/* subtract */ -DEFINE(vp8_block_base_src, offsetof(BLOCK, base_src)); -DEFINE(vp8_block_src, offsetof(BLOCK, src)); -DEFINE(vp8_block_src_diff, offsetof(BLOCK, src_diff)); -DEFINE(vp8_block_src_stride, offsetof(BLOCK, src_stride)); - -DEFINE(vp8_blockd_predictor, offsetof(BLOCKD, predictor)); - -/* pack tokens */ -DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue)); -DEFINE(vp8_writer_range, offsetof(vp8_writer, range)); -DEFINE(vp8_writer_count, offsetof(vp8_writer, count)); -DEFINE(vp8_writer_pos, offsetof(vp8_writer, pos)); -DEFINE(vp8_writer_buffer, offsetof(vp8_writer, buffer)); -DEFINE(vp8_writer_buffer_end, offsetof(vp8_writer, buffer_end)); -DEFINE(vp8_writer_error, offsetof(vp8_writer, error)); - -DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token)); -DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra)); -DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree)); -DEFINE(tokenextra_skip_eob_node, offsetof(TOKENEXTRA, skip_eob_node)); -DEFINE(TOKENEXTRA_SZ, sizeof(TOKENEXTRA)); - -DEFINE(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct)); - -DEFINE(vp8_token_value, offsetof(vp8_token, value)); -DEFINE(vp8_token_len, offsetof(vp8_token, Len)); - -DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree)); -DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob)); -DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len)); -DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val)); - -DEFINE(vp8_comp_tplist, offsetof(VP8_COMP, tplist)); -DEFINE(vp8_comp_common, offsetof(VP8_COMP, common)); -DEFINE(vp8_comp_bc , offsetof(VP8_COMP, bc)); -DEFINE(vp8_writer_sz , sizeof(vp8_writer)); - -DEFINE(tokenlist_start, offsetof(TOKENLIST, start)); -DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop)); -DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST)); - -DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows)); - END - -/* add asserts for any offset that is not supported by assembly code - * add asserts for any size that is not supported by assembly code - - * These are used in vp8cx_pack_tokens. They are hard coded so if their sizes - * change they will have to be adjusted. - */ - -#if HAVE_EDSP -ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8) -ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16) -#endif diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index a0dbdcfa9..5e4ef0598 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -75,7 +75,6 @@ VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c VP8_CX_SRCS-yes += encoder/temporal_filter.c VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.c VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.h -VP8_CX_SRCS-yes += encoder/vp8_asm_enc_offsets.c ifeq ($(CONFIG_REALTIME_ONLY),yes) VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c @@ -107,6 +106,3 @@ VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm endif VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes)) - -$(eval $(call asm_offsets_template,\ - vp8_asm_enc_offsets.asm, $(VP8_PREFIX)encoder/vp8_asm_enc_offsets.c)) diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk index 2c2b87198..050030179 100644 --- a/vp8/vp8cx_arm.mk +++ b/vp8/vp8cx_arm.mk @@ -15,20 +15,9 @@ VP8_CX_SRCS-$(ARCH_ARM) += vp8cx_arm.mk # encoder VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/dct_arm.c -#File list for edsp -# encoder -VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/boolhuff_arm.c -VP8_CX_SRCS_REMOVE-$(HAVE_EDSP) += encoder/boolhuff.c -VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/boolhuff_armv5te$(ASM) -VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM) -VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM) -VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM) - #File list for media # encoder -VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_subtract_armv6$(ASM) VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_short_fdct4x4_armv6$(ASM) -VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM) VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM) VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/walsh_v6$(ASM) diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c index f03feffbc..8a4b8af96 100644 --- a/vpx_ports/arm_cpudetect.c +++ b/vpx_ports/arm_cpudetect.c @@ -49,9 +49,6 @@ int arm_cpu_caps(void) { return flags; } mask = arm_cpu_env_mask(); -#if HAVE_EDSP - flags |= HAS_EDSP; -#endif /* HAVE_EDSP */ #if HAVE_MEDIA flags |= HAS_MEDIA; #endif /* HAVE_MEDIA */ @@ -78,17 +75,6 @@ int arm_cpu_caps(void) { * instructions via their assembled hex code. * All of these instructions should be essentially nops. */ -#if HAVE_EDSP - if (mask & HAS_EDSP) { - __try { - /*PLD [r13]*/ - __emit(0xF5DDF000); - flags |= HAS_EDSP; - } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { - /*Ignore exception.*/ - } - } -#endif /* HAVE_EDSP */ #if HAVE_MEDIA if (mask & HAS_MEDIA) __try { @@ -127,9 +113,6 @@ int arm_cpu_caps(void) { mask = arm_cpu_env_mask(); features = android_getCpuFeatures(); -#if HAVE_EDSP - flags |= HAS_EDSP; -#endif /* HAVE_EDSP */ #if HAVE_MEDIA flags |= HAS_MEDIA; #endif /* HAVE_MEDIA */ @@ -163,23 +146,15 @@ int arm_cpu_caps(void) { */ char buf[512]; while (fgets(buf, 511, fin) != NULL) { -#if HAVE_EDSP || HAVE_NEON || HAVE_NEON_ASM +#if HAVE_NEON || HAVE_NEON_ASM if (memcmp(buf, "Features", 8) == 0) { char *p; -#if HAVE_EDSP - p = strstr(buf, " edsp"); - if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { - flags |= HAS_EDSP; - } -#endif /* HAVE_EDSP */ -#if HAVE_NEON || HAVE_NEON_ASM p = strstr(buf, " neon"); if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { flags |= HAS_NEON; } -#endif /* HAVE_NEON || HAVE_NEON_ASM */ } -#endif /* HAVE_EDSP || HAVE_NEON || HAVE_NEON_ASM */ +#endif /* HAVE_NEON || HAVE_NEON_ASM */ #if HAVE_MEDIA if (memcmp(buf, "CPU architecture:", 17) == 0) { int version; diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk index 0a1594bd8..92d499175 100644 --- a/vpx_scale/vpx_scale.mk +++ b/vpx_scale/vpx_scale.mk @@ -5,7 +5,6 @@ SCALE_SRCS-yes += generic/vpx_scale.c SCALE_SRCS-yes += generic/yv12config.c SCALE_SRCS-yes += generic/yv12extend.c SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c -SCALE_SRCS-yes += vpx_scale_asm_offsets.c SCALE_SRCS-yes += vpx_scale_rtcd.c SCALE_SRCS-yes += vpx_scale_rtcd.pl @@ -14,7 +13,4 @@ SCALE_SRCS-$(HAVE_DSPR2) += mips/dspr2/yv12extend_dspr2.c SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes) -$(eval $(call asm_offsets_template,\ - vpx_scale_asm_offsets.asm, vpx_scale/vpx_scale_asm_offsets.c)) - $(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.pl)) diff --git a/vpx_scale/vpx_scale_asm_offsets.c b/vpx_scale/vpx_scale_asm_offsets.c index caa9e80ff..ce8a670fa 100644 --- a/vpx_scale/vpx_scale_asm_offsets.c +++ b/vpx_scale/vpx_scale_asm_offsets.c @@ -9,32 +9,8 @@ */ -#include "./vpx_config.h" -#include "vpx/vpx_codec.h" #include "vpx_ports/asm_offsets.h" -#include "vpx_scale/yv12config.h" BEGIN -/* vpx_scale */ -DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width)); -DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height)); -DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride)); -DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width)); -DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height)); -DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride)); -DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer)); -DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer)); -DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer)); -DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border)); -DEFINE(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS); - END - -/* add asserts for any offset that is not supported by assembly code */ -/* add asserts for any size that is not supported by assembly code */ - -#if HAVE_NEON -/* vp8_yv12_extend_frame_borders_neon makes several assumptions based on this */ -ct_assert(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS == 32) -#endif