Introduced ARM version of WebRtcSpl_SqrtFloor(). Function cycles reduced by ~ 30% in a real time VOE test in an android device (Nexus-S, ARMv7a).

// Fritz, I added you as a reviewer for the assembly files, just as a warm-up for future storms. :-) The assembly code was from public domain and there's little to touch. Review URL: https://webrtc-codereview.appspot.com/369017 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1627 4adac7df-926f-26a2-2b94-8c16560cd09d
2012-02-07 17:15:15 +00:00
parent 9d9ad88ba5
commit 59f16ec993
5 changed files with 160 additions and 36 deletions
--- a/src/LICENSE_THIRD_PARTY
+++ b/src/LICENSE_THIRD_PARTY
@@ -4,6 +4,8 @@ licenses than the one provided in the LICENSE file in the root of the source
 tree.
 Files governed by third party licenses:
 common_audio/signal_processing/spl_sqrt_floor.c
 common_audio/signal_processing/spl_sqrt_floor.s
 modules/audio_coding/codecs/G711/main/source/g711.h
 modules/audio_coding/codecs/G711/main/source/g711.c
 modules/audio_coding/codecs/G722/main/source/g722_decode.h
--- a/src/common_audio/signal_processing/Android.mk
+++ b/src/common_audio/signal_processing/Android.mk
@@ -42,7 +42,6 @@ LOCAL_SRC_FILES := \
    resample_by_2_internal.c \
    resample_fractional.c \
    spl_sqrt.c \
    spl_sqrt_floor.c \
    spl_version.c \
    splitting_filter.c \
    sqrt_of_one_minus_x_squared.c \
@@ -75,6 +74,14 @@ LOCAL_SRC_FILES += \
    filter_ar_fast_q12.c
 endif
 ifeq ($(TARGET_ARCH),arm)
 LOCAL_SRC_FILES += \
    spl_sqrt_floor.s
 else
 LOCAL_SRC_FILES += \
    spl_sqrt_floor.c
 endif
 LOCAL_SHARED_LIBRARIES := libstlport
 ifeq ($(TARGET_OS)-$(TARGET_SIMULATOR),linux-true)
--- a/src/common_audio/signal_processing/spl_sqrt_floor.c
+++ b/src/common_audio/signal_processing/spl_sqrt_floor.c
@@ -1,21 +1,26 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ * Written by Wilco Dijkstra, 1996. Refer to file LICENSE under
- *
+ * trunk/third_party_mods/sqrt_floor.
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 /*
 * This file contains the function WebRtcSpl_SqrtFloor().
 * The description header can be found in signal_processing_library.h
 *
 * Minor modifications in code style for WebRTC, 2012.
 */
 #include "signal_processing_library.h"
 /*
 * Algorithm:
 * Successive approximation of the equation (root + delta) ^ 2 = N
 * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
 * Use delta = 2^i for i = 15 .. 0.
 *
 * Output precision is 16 bits. Note for large input values (close to
 * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
 * contains the MSB information (a non-sign value). Do with caution
 * if you need to cast the output to int16_t type.
 *
 * If the input value is negative, it returns 0.
 */
 #define WEBRTC_SPL_SQRT_ITER(N)                 \
  try1 = root + (1 << (N));                     \
  if (value >= try1 << (N))                     \
@@ -24,30 +29,26 @@
    root |= 2 << (N);                           \
  }
-// (out) Square root of input parameter
+int32_t WebRtcSpl_SqrtFloor(int32_t value)
 WebRtc_Word32 WebRtcSpl_SqrtFloor(WebRtc_Word32 value)
 {
-    // new routine for performance, 4 cycles/bit in ARM
+  int32_t root = 0, try1;
    // output precision is 16 bits
-    WebRtc_Word32 root = 0, try1;
+  WEBRTC_SPL_SQRT_ITER (15);
  WEBRTC_SPL_SQRT_ITER (14);
  WEBRTC_SPL_SQRT_ITER (13);
  WEBRTC_SPL_SQRT_ITER (12);
  WEBRTC_SPL_SQRT_ITER (11);
  WEBRTC_SPL_SQRT_ITER (10);
  WEBRTC_SPL_SQRT_ITER ( 9);
  WEBRTC_SPL_SQRT_ITER ( 8);
  WEBRTC_SPL_SQRT_ITER ( 7);
  WEBRTC_SPL_SQRT_ITER ( 6);
  WEBRTC_SPL_SQRT_ITER ( 5);
  WEBRTC_SPL_SQRT_ITER ( 4);
  WEBRTC_SPL_SQRT_ITER ( 3);
  WEBRTC_SPL_SQRT_ITER ( 2);
  WEBRTC_SPL_SQRT_ITER ( 1);
  WEBRTC_SPL_SQRT_ITER ( 0);
-    WEBRTC_SPL_SQRT_ITER (15);
+  return root >> 1;
    WEBRTC_SPL_SQRT_ITER (14);
    WEBRTC_SPL_SQRT_ITER (13);
    WEBRTC_SPL_SQRT_ITER (12);
    WEBRTC_SPL_SQRT_ITER (11);
    WEBRTC_SPL_SQRT_ITER (10);
    WEBRTC_SPL_SQRT_ITER ( 9);
    WEBRTC_SPL_SQRT_ITER ( 8);
    WEBRTC_SPL_SQRT_ITER ( 7);
    WEBRTC_SPL_SQRT_ITER ( 6);
    WEBRTC_SPL_SQRT_ITER ( 5);
    WEBRTC_SPL_SQRT_ITER ( 4);
    WEBRTC_SPL_SQRT_ITER ( 3);
    WEBRTC_SPL_SQRT_ITER ( 2);
    WEBRTC_SPL_SQRT_ITER ( 1);
    WEBRTC_SPL_SQRT_ITER ( 0);
    return root >> 1;
 }
--- a/src/common_audio/signal_processing/spl_sqrt_floor.s
+++ b/src/common_audio/signal_processing/spl_sqrt_floor.s
@@ -0,0 +1,88 @@
@ Written by Wilco Dijkstra, 1996. Refer to file LICENSE under
@ trunk/third_party_mods/sqrt_floor.
@
@ Minor modifications in code style for WebRTC, 2012.
@ Output is bit-exact with the reference C code in spl_sqrt_floor.c.
@ Input :             r0 32 bit unsigned integer
@ Output:             r0 = INT (SQRT (r0)), precision is 16 bits
@ Registers touched:  r1, r2
 .global WebRtcSpl_SqrtFloor
 .align  2
 WebRtcSpl_SqrtFloor:
 .fnstart
  mov    r1, #3 << 30
  mov    r2, #1 << 30
  @ unroll for i = 0 .. 15
  cmp    r0, r2, ror #2 * 0
  subhs  r0, r0, r2, ror #2 * 0
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 1
  subhs  r0, r0, r2, ror #2 * 1
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 2
  subhs  r0, r0, r2, ror #2 * 2
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 3
  subhs  r0, r0, r2, ror #2 * 3
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 4
  subhs  r0, r0, r2, ror #2 * 4
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 5
  subhs  r0, r0, r2, ror #2 * 5
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 6
  subhs  r0, r0, r2, ror #2 * 6
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 7
  subhs  r0, r0, r2, ror #2 * 7
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 8
  subhs  r0, r0, r2, ror #2 * 8
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 9
  subhs  r0, r0, r2, ror #2 * 9
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 10
  subhs  r0, r0, r2, ror #2 * 10
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 11
  subhs  r0, r0, r2, ror #2 * 11
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 12
  subhs  r0, r0, r2, ror #2 * 12
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 13
  subhs  r0, r0, r2, ror #2 * 13
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 14
  subhs  r0, r0, r2, ror #2 * 14
  adc    r2, r1, r2, lsl #1
  cmp    r0, r2, ror #2 * 15
  subhs  r0, r0, r2, ror #2 * 15
  adc    r2, r1, r2, lsl #1
  bic    r0, r2, #3 << 30  @ for rounding add: cmp r0, r2  adc r2, #1
  bx lr
 .fnend
--- a/third_party_mods/sqrt_floor/LICENSE
+++ b/third_party_mods/sqrt_floor/LICENSE
@@ -0,0 +1,26 @@
 The following email record is related to source files spl_sqrt_floor.c
 and spl_sqrt_floor.s in trunk/src/common_audio/signal_processing/.
 From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
 Date: Fri, Jun 24, 2011 at 3:20 AM
 Subject: Re: sqrt routine
 To: Kevin Ma <kma@google.com>
 Hi Kevin,
 Thanks for asking. Those routines are public domain (originally posted to 
 comp.sys.arm a long time ago), so you can use them freely for any purpose.
 Cheers,
 Wilco
 ----- Original Message -----
 From: "Kevin Ma" <kma@google.com>
 To: <Wilco.Dijkstra@ntlworld.com>
 Sent: Thursday, June 23, 2011 11:44 PM
 Subject: Fwd: sqrt routine
 Hi Wilco,
 I saw your sqrt routine from several web sites, including
 http://www.finesse.demon.co.uk/steven/sqrt.html.
 Just wonder if there's any copyright information with your Successive
 approximation routines, or if I can freely use it for any purpose.
 Thanks.
 Kevin