diff --git a/build/make/Makefile b/build/make/Makefile index 62d139ea4..5e3c904d9 100755 --- a/build/make/Makefile +++ b/build/make/Makefile @@ -255,7 +255,7 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),) endif # -# Configuration dependant rules +# Configuration dependent rules # $(call pairmap,install_map_templates,$(INSTALL_MAPS)) @@ -332,7 +332,7 @@ ifneq ($(call enabled,DIST-SRCS),) DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/yasm.rules DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh # - # This isn't really ARCH_ARM dependent, it's dependant on whether we're + # This isn't really ARCH_ARM dependent, it's dependent on whether we're # using assembly code or not (CONFIG_OPTIMIZATIONS maybe). Just use # this for now. DIST-SRCS-$(ARCH_ARM) += build/make/obj_int_extract.c diff --git a/build/make/configure.sh b/build/make/configure.sh index d504008a4..a48fd9faf 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -83,7 +83,7 @@ Build options: ${toggle_werror} treat warnings as errors, if possible (not available with all compilers) ${toggle_optimizations} turn on/off compiler optimization flags - ${toggle_pic} turn on/off Position Independant Code + ${toggle_pic} turn on/off Position Independent Code ${toggle_ccache} turn on/off compiler cache ${toggle_debug} enable/disable debug mode ${toggle_gprof} enable/disable gprof profiling instrumentation @@ -957,7 +957,7 @@ process_common_toolchain() { enabled small && check_add_cflags -O2 || check_add_cflags -O3 fi - # Position Independant Code (PIC) support, for building relocatable + # Position Independent Code (PIC) support, for building relocatable # shared objects enabled gcc && enabled pic && check_add_cflags -fPIC diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c index 5ba14f375..a661a89a4 100644 --- a/vp8/encoder/arm/arm_csystemdependent.c +++ b/vp8/encoder/arm/arm_csystemdependent.c @@ -35,15 +35,15 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/ - /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; + /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;*/ + cpi->rtcd.variance.var8x8 = vp8_variance8x8_armv6; + /*cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;*/ cpi->rtcd.variance.var16x16 = vp8_variance16x16_armv6; - /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; + /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;*/ + cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_armv6; + /*cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/ cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_armv6; cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6; diff --git a/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm new file mode 100644 index 000000000..7daecb925 --- /dev/null +++ b/vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm @@ -0,0 +1,95 @@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_variance8x8_armv6| + + ARM + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 unsigned char *src_ptr +; r1 int source_stride +; r2 unsigned char *ref_ptr +; r3 int recon_stride +; stack unsigned int *sse +|vp8_variance8x8_armv6| PROC + + push {r4-r10, lr} + mov r12, #8 ; set loop counter to 8 (=block height) + mov r4, #0 ; initialize sum = 0 + mov r5, #0 ; initialize sse = 0 + +loop + ; 1st 4 pixels + ldr r6, [r0, #0x0] ; load 4 src pixels + ldr r7, [r2, #0x0] ; load 4 ref pixels + + mov lr, #0 ; constant zero + + usub8 r8, r6, r7 ; calculate difference + sel r10, r8, lr ; select bytes with positive difference + usub8 r9, r7, r6 ; calculate difference with reversed operands + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r6, r10, lr ; calculate sum of positive differences + usad8 r7, r8, lr ; calculate sum of negative differences + orr r8, r8, r10 ; differences of all 4 pixels + ; calculate total sum + add r4, r4, r6 ; add positive differences to sum + sub r4, r4, r7 ; substract negative differences from sum + + ; calculate sse + uxtb16 r7, r8 ; byte (two pixels) to halfwords + uxtb16 r10, r8, ror #8 ; another two pixels to halfwords + smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1) + + ; 2nd 4 pixels + ldr r6, [r0, #0x4] ; load 4 src pixels + ldr r7, [r2, #0x4] ; load 4 ref pixels + smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2) + + usub8 r8, r6, r7 ; calculate difference + add r0, r0, r1 ; set src_ptr to next row + sel r10, r8, lr ; select bytes with positive difference + usub8 r9, r7, r6 ; calculate difference with reversed operands + add r2, r2, r3 ; set dst_ptr to next row + sel r8, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r6, r10, lr ; calculate sum of positive differences + usad8 r7, r8, lr ; calculate sum of negative differences + orr r8, r8, r10 ; differences of all 4 pixels + + ; calculate total sum + add r4, r4, r6 ; add positive differences to sum + sub r4, r4, r7 ; substract negative differences from sum + + ; calculate sse + uxtb16 r7, r8 ; byte (two pixels) to halfwords + uxtb16 r10, r8, ror #8 ; another two pixels to halfwords + smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1) + subs r12, r12, #1 ; next row + smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2) + + bne loop + + ; return stuff + ldr r8, [sp, #32] ; get address of sse + mul r1, r4, r4 ; sum * sum + str r5, [r8] ; store sse + sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6)) + + pop {r4-r10, pc} + + ENDP + + END diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c index 64d76bcf8..ed1fb16d5 100644 --- a/vp8/encoder/arm/variance_arm.c +++ b/vp8/encoder/arm/variance_arm.c @@ -15,6 +15,34 @@ #if HAVE_ARMV6 +unsigned int vp8_sub_pixel_variance8x8_armv6 +( + const unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse +) +{ + unsigned short first_pass[10*8]; + unsigned char second_pass[8*8]; + const short *HFilter, *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass, + src_pixels_per_line, + 9, 8, HFilter); + vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass, + 8, 8, 8, VFilter); + + return vp8_variance8x8_armv6(second_pass, 8, dst_ptr, + dst_pixels_per_line, sse); +} + unsigned int vp8_sub_pixel_variance16x16_armv6 ( const unsigned char *src_ptr, diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h index 7ad7c76d3..86de27476 100644 --- a/vp8/encoder/arm/variance_arm.h +++ b/vp8/encoder/arm/variance_arm.h @@ -16,7 +16,9 @@ extern prototype_sad(vp8_sad16x16_armv6); extern prototype_variance(vp8_variance16x16_armv6); +extern prototype_variance(vp8_variance8x8_armv6); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6); +extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_armv6); extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6); extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6); extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6); @@ -30,12 +32,18 @@ extern prototype_variance(vp8_mse16x16_armv6); #undef vp8_variance_subpixvar16x16 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_armv6 +#undef vp8_variance_subpixvar8x8 +#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_armv6 + #undef vp8_variance_var16x16 #define vp8_variance_var16x16 vp8_variance16x16_armv6 #undef vp8_variance_mse16x16 #define vp8_variance_mse16x16 vp8_mse16x16_armv6 +#undef vp8_variance_var8x8 +#define vp8_variance_var8x8 vp8_variance8x8_armv6 + #undef vp8_variance_halfpixvar16x16_h #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6 diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk index b07ee8ffb..a11e1cad2 100644 --- a/vp8/vp8cx_arm.mk +++ b/vp8/vp8cx_arm.mk @@ -38,6 +38,7 @@ VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM) VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM) VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM) VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM) +VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance8x8_armv6$(ASM) VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/walsh_v6$(ASM) #File list for neon diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h index 4d58bf869..d92e165ff 100644 --- a/vpx/vpx_codec.h +++ b/vpx/vpx_codec.h @@ -498,7 +498,7 @@ extern "C" { * Iterates over a list of the segments to allocate. The iterator storage * should be initialized to NULL to start the iteration. Iteration is complete * when this function returns VPX_CODEC_LIST_END. The amount of memory needed to - * allocate is dependant upon the size of the encoded stream. In cases where the + * allocate is dependent upon the size of the encoded stream. In cases where the * stream is not available at allocation time, a fixed size must be requested. * The codec will not be able to operate on streams larger than the size used at * allocation time. diff --git a/vpx/vpx_decoder_compat.h b/vpx/vpx_decoder_compat.h index 0fec32777..8adc1b998 100644 --- a/vpx/vpx_decoder_compat.h +++ b/vpx/vpx_decoder_compat.h @@ -527,7 +527,7 @@ extern "C" { * Iterates over a list of the segments to allocate. The iterator storage * should be initialized to NULL to start the iteration. Iteration is complete * when this function returns VPX_DEC_LIST_END. The amount of memory needed to - * allocate is dependant upon the size of the encoded stream. This means that + * allocate is dependent upon the size of the encoded stream. This means that * the stream info structure must be known at allocation time. It can be * populated with the vpx_dec_peek_stream_info() function. In cases where the * stream to be decoded is not available at allocation time, a fixed size must diff --git a/vpx_scale/arm/scalesystemdependant.c b/vpx_scale/arm/scalesystemdependent.c similarity index 100% rename from vpx_scale/arm/scalesystemdependant.c rename to vpx_scale/arm/scalesystemdependent.c diff --git a/vpx_scale/generic/scalesystemdependant.c b/vpx_scale/generic/scalesystemdependent.c similarity index 100% rename from vpx_scale/generic/scalesystemdependant.c rename to vpx_scale/generic/scalesystemdependent.c diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk index f4ab258ed..edb5419c3 100644 --- a/vpx_scale/vpx_scale.mk +++ b/vpx_scale/vpx_scale.mk @@ -6,13 +6,13 @@ SCALE_SRCS-yes += vpxscale.h SCALE_SRCS-yes += generic/vpxscale.c SCALE_SRCS-yes += generic/yv12config.c SCALE_SRCS-yes += generic/yv12extend.c -SCALE_SRCS-yes += generic/scalesystemdependant.c +SCALE_SRCS-yes += generic/scalesystemdependent.c SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c #arm -SCALE_SRCS-$(HAVE_ARMV7) += arm/scalesystemdependant.c +SCALE_SRCS-$(HAVE_ARMV7) += arm/scalesystemdependent.c SCALE_SRCS-$(HAVE_ARMV7) += arm/yv12extend_arm.c -SCALE_SRCS_REMOVE-$(HAVE_ARMV7) += generic/scalesystemdependant.c +SCALE_SRCS_REMOVE-$(HAVE_ARMV7) += generic/scalesystemdependent.c #neon SCALE_SRCS-$(HAVE_ARMV7) += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM) diff --git a/vpx_scale/win32/scalesystemdependant.c b/vpx_scale/win32/scalesystemdependent.c similarity index 97% rename from vpx_scale/win32/scalesystemdependant.c rename to vpx_scale/win32/scalesystemdependent.c index eab741f83..19e61c3a5 100644 --- a/vpx_scale/win32/scalesystemdependant.c +++ b/vpx_scale/win32/scalesystemdependent.c @@ -11,9 +11,9 @@ /**************************************************************************** * -* Module Title : system_dependant.c +* Module Title : system_dependent.c * -* Description : Miscellaneous system dependant functions +* Description : Miscellaneous system dependent functions * ****************************************************************************/