From dcdf311b6ca57aaa61c6627d262cc14f2cc49d8a Mon Sep 17 00:00:00 2001 From: "henrika@google.com" Date: Thu, 7 Jul 2011 08:05:26 +0000 Subject: [PATCH] Rollback to working sqrt version. git-svn-id: http://webrtc.googlecode.com/svn/trunk@154 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../main/source/Android.mk | 9 +- .../main/source/spl_sqrt.c | 180 ++++++++++++++++-- .../main/source/spl_sqrt.s | 93 --------- 3 files changed, 164 insertions(+), 118 deletions(-) delete mode 100644 common_audio/signal_processing_library/main/source/spl_sqrt.s diff --git a/common_audio/signal_processing_library/main/source/Android.mk b/common_audio/signal_processing_library/main/source/Android.mk index b46cf11af..8b08676f8 100644 --- a/common_audio/signal_processing_library/main/source/Android.mk +++ b/common_audio/signal_processing_library/main/source/Android.mk @@ -53,6 +53,7 @@ LOCAL_SRC_FILES := add_sat_w16.c \ resample_fractional.c \ sin_table.c \ sin_table_1024.c \ + spl_sqrt.c \ spl_version.c \ splitting_filter.c \ sqrt_of_one_minus_x_squared.c \ @@ -60,14 +61,6 @@ LOCAL_SRC_FILES := add_sat_w16.c \ sub_sat_w32.c \ vector_scaling_operations.c -ifeq ($(TARGET_ARCH), arm) -LOCAL_SRC_FILES += \ - spl_sqrt.s -else -LOCAL_SRC_FILES += \ - spl_sqrt.c -endif - # Flags passed to both C and C++ files. MY_CFLAGS := MY_CFLAGS_C := diff --git a/common_audio/signal_processing_library/main/source/spl_sqrt.c b/common_audio/signal_processing_library/main/source/spl_sqrt.c index dd34fd7b2..cfe2cd3f3 100644 --- a/common_audio/signal_processing_library/main/source/spl_sqrt.c +++ b/common_audio/signal_processing_library/main/source/spl_sqrt.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ + /* * This file contains the function WebRtcSpl_Sqrt(). * The description header can be found in signal_processing_library.h @@ -16,23 +17,168 @@ #include "signal_processing_library.h" -#define iter1(N) \ - try1 = root + (1 << (N)); \ - if (value >= try1 << (N)) \ - { \ - value -= try1 << (N); \ - root |= 2 << (N); \ - } +WebRtc_Word32 WebRtcSpl_SqrtLocal(WebRtc_Word32 in); -// (out) Square root of input parameter -WebRtc_Word32 WebRtcSpl_Sqrt(WebRtc_Word32 value) { - // new routine for performance, 4 cycles/bit in ARM - // output precision is 16 bits +WebRtc_Word32 WebRtcSpl_SqrtLocal(WebRtc_Word32 in) +{ - WebRtc_Word32 root = 0, try1; - iter1 (15); iter1 (14); iter1 (13); iter1 (12); - iter1 (11); iter1 (10); iter1 ( 9); iter1 ( 8); - iter1 ( 7); iter1 ( 6); iter1 ( 5); iter1 ( 4); - iter1 ( 3); iter1 ( 2); iter1 ( 1); iter1 ( 0); - return root >> 1; + WebRtc_Word16 x_half, t16; + WebRtc_Word32 A, B, x2; + + /* The following block performs: + y=in/2 + x=y-2^30 + x_half=x/2^31 + t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) + + 0.875*((x_half)^5) + */ + + B = in; + + B = WEBRTC_SPL_RSHIFT_W32(B, 1); // B = in/2 + B = B - ((WebRtc_Word32)0x40000000); // B = in/2 - 1/2 + x_half = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(B, 16);// x_half = x/2 = (in-1)/2 + B = B + ((WebRtc_Word32)0x40000000); // B = 1 + x/2 + B = B + ((WebRtc_Word32)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31) + + x2 = ((WebRtc_Word32)x_half) * ((WebRtc_Word32)x_half) * 2; // A = (x/2)^2 + A = -x2; // A = -(x/2)^2 + B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2 + + A = WEBRTC_SPL_RSHIFT_W32(A, 16); + A = A * A * 2; // A = (x/2)^4 + t16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(A, 16); + B = B + WEBRTC_SPL_MUL_16_16(-20480, t16) * 2; // B = B - 0.625*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + + t16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(A, 16); + A = WEBRTC_SPL_MUL_16_16(x_half, t16) * 2; // A = (x/2)^5 + t16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(A, 16); + B = B + WEBRTC_SPL_MUL_16_16(28672, t16) * 2; // B = B + 0.875*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5 + + t16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(x2, 16); + A = WEBRTC_SPL_MUL_16_16(x_half, t16) * 2; // A = x/2^3 + + B = B + (A >> 1); // B = B + 0.5*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5 + + B = B + ((WebRtc_Word32)32768); // Round off bit + + return B; +} + +WebRtc_Word32 WebRtcSpl_Sqrt(WebRtc_Word32 value) +{ + /* + Algorithm: + + Six term Taylor Series is used here to compute the square root of a number + y^0.5 = (1+x)^0.5 where x = y-1 + = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5) + 0.5 <= x < 1 + + Example of how the algorithm works, with ut=sqrt(in), and + with in=73632 and ut=271 (even shift value case): + + in=73632 + y= in/131072 + x=y-1 + t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) + ut=t*(1/sqrt(2))*512 + + or: + + in=73632 + in2=73632*2^14 + y= in2/2^31 + x=y-1 + t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) + ut=t*(1/sqrt(2)) + ut2=ut*2^9 + + which gives: + + in = 73632 + in2 = 1206386688 + y = 0.56176757812500 + x = -0.43823242187500 + t = 0.74973506527313 + ut = 0.53014274874797 + ut2 = 2.714330873589594e+002 + + or: + + in=73632 + in2=73632*2^14 + y=in2/2 + x=y-2^30 + x_half=x/2^31 + t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) + + 0.875*((x_half)^5) + ut=t*(1/sqrt(2)) + ut2=ut*2^9 + + which gives: + + in = 73632 + in2 = 1206386688 + y = 603193344 + x = -470548480 + x_half = -0.21911621093750 + t = 0.74973506527313 + ut = 0.53014274874797 + ut2 = 2.714330873589594e+002 + + */ + + WebRtc_Word16 x_norm, nshift, t16, sh; + WebRtc_Word32 A; + + WebRtc_Word16 k_sqrt_2 = 23170; // 1/sqrt2 (==5a82) + + A = value; + + if (A == 0) + return (WebRtc_Word32)0; // sqrt(0) = 0 + + sh = WebRtcSpl_NormW32(A); // # shifts to normalize A + A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A + if (A < (WEBRTC_SPL_WORD32_MAX - 32767)) + { + A = A + ((WebRtc_Word32)32768); // Round off bit + } else + { + A = WEBRTC_SPL_WORD32_MAX; + } + + x_norm = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(A, 16); // x_norm = AH + + nshift = WEBRTC_SPL_RSHIFT_W16(sh, 1); // nshift = sh>>1 + nshift = -nshift; // Negate the power for later de-normalization + + A = (WebRtc_Word32)WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)x_norm, 16); + A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16) + A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A) + + if ((-2 * nshift) == sh) + { // Even shift value case + + t16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(A, 16); // t16 = AH + + A = WEBRTC_SPL_MUL_16_16(k_sqrt_2, t16) * 2; // A = 1/sqrt(2)*t16 + A = A + ((WebRtc_Word32)32768); // Round off + A = A & ((WebRtc_Word32)0x7fff0000); // Round off + + A = WEBRTC_SPL_RSHIFT_W32(A, 15); // A = A>>16 + + } else + { + A = WEBRTC_SPL_RSHIFT_W32(A, 16); // A = A>>16 + } + + A = A & ((WebRtc_Word32)0x0000ffff); + A = (WebRtc_Word32)WEBRTC_SPL_SHIFT_W32(A, nshift); // De-normalize the result + + return A; } diff --git a/common_audio/signal_processing_library/main/source/spl_sqrt.s b/common_audio/signal_processing_library/main/source/spl_sqrt.s deleted file mode 100644 index f546fce4a..000000000 --- a/common_audio/signal_processing_library/main/source/spl_sqrt.s +++ /dev/null @@ -1,93 +0,0 @@ -@ -@ Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. -@ -@ Use of this source code is governed by a BSD-style license -@ that can be found in the LICENSE file in the root of the source -@ tree. An additional intellectual property rights grant can be found -@ in the file PATENTS. All contributing project authors may -@ be found in the AUTHORS file in the root of the source tree. - -@ sqrt() routine. 3 cycles/bit, total 51 cycles. -@ IN : r0 32 bit unsigned integer -@ OUT: r0 = INT (SQRT (r0)), precision is 16 bits -@ TMP: r1, r2 - -.global WebRtcSpl_Sqrt - -.align 2 -.section .text.WebRtcSpl_Sqrt: -WebRtcSpl_Sqrt: -.fnstart - - MOV r1, #3 << 30 - MOV r2, #1 << 30 - - @ unroll for i = 0 .. 15 - - CMP r0, r2, ROR #2 * 0 - SUBHS r0, r0, r2, ROR #2 * 0 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 1 - SUBHS r0, r0, r2, ROR #2 * 1 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 2 - SUBHS r0, r0, r2, ROR #2 * 2 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 3 - SUBHS r0, r0, r2, ROR #2 * 3 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 4 - SUBHS r0, r0, r2, ROR #2 * 4 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 5 - SUBHS r0, r0, r2, ROR #2 * 5 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 6 - SUBHS r0, r0, r2, ROR #2 * 6 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 7 - SUBHS r0, r0, r2, ROR #2 * 7 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 8 - SUBHS r0, r0, r2, ROR #2 * 8 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 9 - SUBHS r0, r0, r2, ROR #2 * 9 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 10 - SUBHS r0, r0, r2, ROR #2 * 10 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 11 - SUBHS r0, r0, r2, ROR #2 * 11 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 12 - SUBHS r0, r0, r2, ROR #2 * 12 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 13 - SUBHS r0, r0, r2, ROR #2 * 13 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 14 - SUBHS r0, r0, r2, ROR #2 * 14 - ADC r2, r1, r2, LSL #1 - - CMP r0, r2, ROR #2 * 15 - SUBHS r0, r0, r2, ROR #2 * 15 - ADC r2, r1, r2, LSL #1 - - BIC r0, r2, #3 << 30 @ for rounding add: CMP r0, r2 ADC r2, #1 - -.fnend