Rollback to working sqrt version.
git-svn-id: http://webrtc.googlecode.com/svn/trunk@154 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
d0159d8eb0
commit
dcdf311b6c
@ -53,6 +53,7 @@ LOCAL_SRC_FILES := add_sat_w16.c \
|
||||
resample_fractional.c \
|
||||
sin_table.c \
|
||||
sin_table_1024.c \
|
||||
spl_sqrt.c \
|
||||
spl_version.c \
|
||||
splitting_filter.c \
|
||||
sqrt_of_one_minus_x_squared.c \
|
||||
@ -60,14 +61,6 @@ LOCAL_SRC_FILES := add_sat_w16.c \
|
||||
sub_sat_w32.c \
|
||||
vector_scaling_operations.c
|
||||
|
||||
ifeq ($(TARGET_ARCH), arm)
|
||||
LOCAL_SRC_FILES += \
|
||||
spl_sqrt.s
|
||||
else
|
||||
LOCAL_SRC_FILES += \
|
||||
spl_sqrt.c
|
||||
endif
|
||||
|
||||
# Flags passed to both C and C++ files.
|
||||
MY_CFLAGS :=
|
||||
MY_CFLAGS_C :=
|
||||
|
@ -8,6 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_Sqrt().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
@ -16,23 +17,168 @@
|
||||
|
||||
#include "signal_processing_library.h"
|
||||
|
||||
#define iter1(N) \
|
||||
try1 = root + (1 << (N)); \
|
||||
if (value >= try1 << (N)) \
|
||||
{ \
|
||||
value -= try1 << (N); \
|
||||
root |= 2 << (N); \
|
||||
}
|
||||
WebRtc_Word32 WebRtcSpl_SqrtLocal(WebRtc_Word32 in);
|
||||
|
||||
// (out) Square root of input parameter
|
||||
WebRtc_Word32 WebRtcSpl_Sqrt(WebRtc_Word32 value) {
|
||||
// new routine for performance, 4 cycles/bit in ARM
|
||||
// output precision is 16 bits
|
||||
WebRtc_Word32 WebRtcSpl_SqrtLocal(WebRtc_Word32 in)
|
||||
{
|
||||
|
||||
WebRtc_Word32 root = 0, try1;
|
||||
iter1 (15); iter1 (14); iter1 (13); iter1 (12);
|
||||
iter1 (11); iter1 (10); iter1 ( 9); iter1 ( 8);
|
||||
iter1 ( 7); iter1 ( 6); iter1 ( 5); iter1 ( 4);
|
||||
iter1 ( 3); iter1 ( 2); iter1 ( 1); iter1 ( 0);
|
||||
return root >> 1;
|
||||
WebRtc_Word16 x_half, t16;
|
||||
WebRtc_Word32 A, B, x2;
|
||||
|
||||
/* The following block performs:
|
||||
y=in/2
|
||||
x=y-2^30
|
||||
x_half=x/2^31
|
||||
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
|
||||
+ 0.875*((x_half)^5)
|
||||
*/
|
||||
|
||||
B = in;
|
||||
|
||||
B = WEBRTC_SPL_RSHIFT_W32(B, 1); // B = in/2
|
||||
B = B - ((WebRtc_Word32)0x40000000); // B = in/2 - 1/2
|
||||
x_half = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(B, 16);// x_half = x/2 = (in-1)/2
|
||||
B = B + ((WebRtc_Word32)0x40000000); // B = 1 + x/2
|
||||
B = B + ((WebRtc_Word32)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
|
||||
|
||||
x2 = ((WebRtc_Word32)x_half) * ((WebRtc_Word32)x_half) * 2; // A = (x/2)^2
|
||||
A = -x2; // A = -(x/2)^2
|
||||
B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
|
||||
|
||||
A = WEBRTC_SPL_RSHIFT_W32(A, 16);
|
||||
A = A * A * 2; // A = (x/2)^4
|
||||
t16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(A, 16);
|
||||
B = B + WEBRTC_SPL_MUL_16_16(-20480, t16) * 2; // B = B - 0.625*A
|
||||
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
|
||||
|
||||
t16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(A, 16);
|
||||
A = WEBRTC_SPL_MUL_16_16(x_half, t16) * 2; // A = (x/2)^5
|
||||
t16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(A, 16);
|
||||
B = B + WEBRTC_SPL_MUL_16_16(28672, t16) * 2; // B = B + 0.875*A
|
||||
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
|
||||
|
||||
t16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(x2, 16);
|
||||
A = WEBRTC_SPL_MUL_16_16(x_half, t16) * 2; // A = x/2^3
|
||||
|
||||
B = B + (A >> 1); // B = B + 0.5*A
|
||||
// After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
|
||||
|
||||
B = B + ((WebRtc_Word32)32768); // Round off bit
|
||||
|
||||
return B;
|
||||
}
|
||||
|
||||
WebRtc_Word32 WebRtcSpl_Sqrt(WebRtc_Word32 value)
|
||||
{
|
||||
/*
|
||||
Algorithm:
|
||||
|
||||
Six term Taylor Series is used here to compute the square root of a number
|
||||
y^0.5 = (1+x)^0.5 where x = y-1
|
||||
= 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
|
||||
0.5 <= x < 1
|
||||
|
||||
Example of how the algorithm works, with ut=sqrt(in), and
|
||||
with in=73632 and ut=271 (even shift value case):
|
||||
|
||||
in=73632
|
||||
y= in/131072
|
||||
x=y-1
|
||||
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
|
||||
ut=t*(1/sqrt(2))*512
|
||||
|
||||
or:
|
||||
|
||||
in=73632
|
||||
in2=73632*2^14
|
||||
y= in2/2^31
|
||||
x=y-1
|
||||
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
|
||||
ut=t*(1/sqrt(2))
|
||||
ut2=ut*2^9
|
||||
|
||||
which gives:
|
||||
|
||||
in = 73632
|
||||
in2 = 1206386688
|
||||
y = 0.56176757812500
|
||||
x = -0.43823242187500
|
||||
t = 0.74973506527313
|
||||
ut = 0.53014274874797
|
||||
ut2 = 2.714330873589594e+002
|
||||
|
||||
or:
|
||||
|
||||
in=73632
|
||||
in2=73632*2^14
|
||||
y=in2/2
|
||||
x=y-2^30
|
||||
x_half=x/2^31
|
||||
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
|
||||
+ 0.875*((x_half)^5)
|
||||
ut=t*(1/sqrt(2))
|
||||
ut2=ut*2^9
|
||||
|
||||
which gives:
|
||||
|
||||
in = 73632
|
||||
in2 = 1206386688
|
||||
y = 603193344
|
||||
x = -470548480
|
||||
x_half = -0.21911621093750
|
||||
t = 0.74973506527313
|
||||
ut = 0.53014274874797
|
||||
ut2 = 2.714330873589594e+002
|
||||
|
||||
*/
|
||||
|
||||
WebRtc_Word16 x_norm, nshift, t16, sh;
|
||||
WebRtc_Word32 A;
|
||||
|
||||
WebRtc_Word16 k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
|
||||
|
||||
A = value;
|
||||
|
||||
if (A == 0)
|
||||
return (WebRtc_Word32)0; // sqrt(0) = 0
|
||||
|
||||
sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
|
||||
A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
|
||||
if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
|
||||
{
|
||||
A = A + ((WebRtc_Word32)32768); // Round off bit
|
||||
} else
|
||||
{
|
||||
A = WEBRTC_SPL_WORD32_MAX;
|
||||
}
|
||||
|
||||
x_norm = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(A, 16); // x_norm = AH
|
||||
|
||||
nshift = WEBRTC_SPL_RSHIFT_W16(sh, 1); // nshift = sh>>1
|
||||
nshift = -nshift; // Negate the power for later de-normalization
|
||||
|
||||
A = (WebRtc_Word32)WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)x_norm, 16);
|
||||
A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
|
||||
A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
|
||||
|
||||
if ((-2 * nshift) == sh)
|
||||
{ // Even shift value case
|
||||
|
||||
t16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(A, 16); // t16 = AH
|
||||
|
||||
A = WEBRTC_SPL_MUL_16_16(k_sqrt_2, t16) * 2; // A = 1/sqrt(2)*t16
|
||||
A = A + ((WebRtc_Word32)32768); // Round off
|
||||
A = A & ((WebRtc_Word32)0x7fff0000); // Round off
|
||||
|
||||
A = WEBRTC_SPL_RSHIFT_W32(A, 15); // A = A>>16
|
||||
|
||||
} else
|
||||
{
|
||||
A = WEBRTC_SPL_RSHIFT_W32(A, 16); // A = A>>16
|
||||
}
|
||||
|
||||
A = A & ((WebRtc_Word32)0x0000ffff);
|
||||
A = (WebRtc_Word32)WEBRTC_SPL_SHIFT_W32(A, nshift); // De-normalize the result
|
||||
|
||||
return A;
|
||||
}
|
||||
|
@ -1,93 +0,0 @@
|
||||
@
|
||||
@ Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
@
|
||||
@ Use of this source code is governed by a BSD-style license
|
||||
@ that can be found in the LICENSE file in the root of the source
|
||||
@ tree. An additional intellectual property rights grant can be found
|
||||
@ in the file PATENTS. All contributing project authors may
|
||||
@ be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
@ sqrt() routine. 3 cycles/bit, total 51 cycles.
|
||||
@ IN : r0 32 bit unsigned integer
|
||||
@ OUT: r0 = INT (SQRT (r0)), precision is 16 bits
|
||||
@ TMP: r1, r2
|
||||
|
||||
.global WebRtcSpl_Sqrt
|
||||
|
||||
.align 2
|
||||
.section .text.WebRtcSpl_Sqrt:
|
||||
WebRtcSpl_Sqrt:
|
||||
.fnstart
|
||||
|
||||
MOV r1, #3 << 30
|
||||
MOV r2, #1 << 30
|
||||
|
||||
@ unroll for i = 0 .. 15
|
||||
|
||||
CMP r0, r2, ROR #2 * 0
|
||||
SUBHS r0, r0, r2, ROR #2 * 0
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 1
|
||||
SUBHS r0, r0, r2, ROR #2 * 1
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 2
|
||||
SUBHS r0, r0, r2, ROR #2 * 2
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 3
|
||||
SUBHS r0, r0, r2, ROR #2 * 3
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 4
|
||||
SUBHS r0, r0, r2, ROR #2 * 4
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 5
|
||||
SUBHS r0, r0, r2, ROR #2 * 5
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 6
|
||||
SUBHS r0, r0, r2, ROR #2 * 6
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 7
|
||||
SUBHS r0, r0, r2, ROR #2 * 7
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 8
|
||||
SUBHS r0, r0, r2, ROR #2 * 8
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 9
|
||||
SUBHS r0, r0, r2, ROR #2 * 9
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 10
|
||||
SUBHS r0, r0, r2, ROR #2 * 10
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 11
|
||||
SUBHS r0, r0, r2, ROR #2 * 11
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 12
|
||||
SUBHS r0, r0, r2, ROR #2 * 12
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 13
|
||||
SUBHS r0, r0, r2, ROR #2 * 13
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 14
|
||||
SUBHS r0, r0, r2, ROR #2 * 14
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
CMP r0, r2, ROR #2 * 15
|
||||
SUBHS r0, r0, r2, ROR #2 * 15
|
||||
ADC r2, r1, r2, LSL #1
|
||||
|
||||
BIC r0, r2, #3 << 30 @ for rounding add: CMP r0, r2 ADC r2, #1
|
||||
|
||||
.fnend
|
Loading…
x
Reference in New Issue
Block a user