Introduced ARM version of WebRtcSpl_SqrtFloor(). Function cycles reduced by ~ 30% in a real time VOE test in an android device (Nexus-S, ARMv7a).
// Fritz, I added you as a reviewer for the assembly files, just as a warm-up for future storms. :-) The assembly code was from public domain and there's little to touch. Review URL: https://webrtc-codereview.appspot.com/369017 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1627 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
9d9ad88ba5
commit
59f16ec993
@ -4,6 +4,8 @@ licenses than the one provided in the LICENSE file in the root of the source
|
|||||||
tree.
|
tree.
|
||||||
|
|
||||||
Files governed by third party licenses:
|
Files governed by third party licenses:
|
||||||
|
common_audio/signal_processing/spl_sqrt_floor.c
|
||||||
|
common_audio/signal_processing/spl_sqrt_floor.s
|
||||||
modules/audio_coding/codecs/G711/main/source/g711.h
|
modules/audio_coding/codecs/G711/main/source/g711.h
|
||||||
modules/audio_coding/codecs/G711/main/source/g711.c
|
modules/audio_coding/codecs/G711/main/source/g711.c
|
||||||
modules/audio_coding/codecs/G722/main/source/g722_decode.h
|
modules/audio_coding/codecs/G722/main/source/g722_decode.h
|
||||||
|
@ -42,7 +42,6 @@ LOCAL_SRC_FILES := \
|
|||||||
resample_by_2_internal.c \
|
resample_by_2_internal.c \
|
||||||
resample_fractional.c \
|
resample_fractional.c \
|
||||||
spl_sqrt.c \
|
spl_sqrt.c \
|
||||||
spl_sqrt_floor.c \
|
|
||||||
spl_version.c \
|
spl_version.c \
|
||||||
splitting_filter.c \
|
splitting_filter.c \
|
||||||
sqrt_of_one_minus_x_squared.c \
|
sqrt_of_one_minus_x_squared.c \
|
||||||
@ -75,6 +74,14 @@ LOCAL_SRC_FILES += \
|
|||||||
filter_ar_fast_q12.c
|
filter_ar_fast_q12.c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET_ARCH),arm)
|
||||||
|
LOCAL_SRC_FILES += \
|
||||||
|
spl_sqrt_floor.s
|
||||||
|
else
|
||||||
|
LOCAL_SRC_FILES += \
|
||||||
|
spl_sqrt_floor.c
|
||||||
|
endif
|
||||||
|
|
||||||
LOCAL_SHARED_LIBRARIES := libstlport
|
LOCAL_SHARED_LIBRARIES := libstlport
|
||||||
|
|
||||||
ifeq ($(TARGET_OS)-$(TARGET_SIMULATOR),linux-true)
|
ifeq ($(TARGET_OS)-$(TARGET_SIMULATOR),linux-true)
|
||||||
|
@ -1,21 +1,26 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
* Written by Wilco Dijkstra, 1996. Refer to file LICENSE under
|
||||||
*
|
* trunk/third_party_mods/sqrt_floor.
|
||||||
* Use of this source code is governed by a BSD-style license
|
|
||||||
* that can be found in the LICENSE file in the root of the source
|
|
||||||
* tree. An additional intellectual property rights grant can be found
|
|
||||||
* in the file PATENTS. All contributing project authors may
|
|
||||||
* be found in the AUTHORS file in the root of the source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This file contains the function WebRtcSpl_SqrtFloor().
|
|
||||||
* The description header can be found in signal_processing_library.h
|
|
||||||
*
|
*
|
||||||
|
* Minor modifications in code style for WebRTC, 2012.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "signal_processing_library.h"
|
#include "signal_processing_library.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Algorithm:
|
||||||
|
* Successive approximation of the equation (root + delta) ^ 2 = N
|
||||||
|
* until delta < 1. If delta < 1 we have the integer part of SQRT (N).
|
||||||
|
* Use delta = 2^i for i = 15 .. 0.
|
||||||
|
*
|
||||||
|
* Output precision is 16 bits. Note for large input values (close to
|
||||||
|
* 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
|
||||||
|
* contains the MSB information (a non-sign value). Do with caution
|
||||||
|
* if you need to cast the output to int16_t type.
|
||||||
|
*
|
||||||
|
* If the input value is negative, it returns 0.
|
||||||
|
*/
|
||||||
|
|
||||||
#define WEBRTC_SPL_SQRT_ITER(N) \
|
#define WEBRTC_SPL_SQRT_ITER(N) \
|
||||||
try1 = root + (1 << (N)); \
|
try1 = root + (1 << (N)); \
|
||||||
if (value >= try1 << (N)) \
|
if (value >= try1 << (N)) \
|
||||||
@ -24,30 +29,26 @@
|
|||||||
root |= 2 << (N); \
|
root |= 2 << (N); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// (out) Square root of input parameter
|
int32_t WebRtcSpl_SqrtFloor(int32_t value)
|
||||||
WebRtc_Word32 WebRtcSpl_SqrtFloor(WebRtc_Word32 value)
|
|
||||||
{
|
{
|
||||||
// new routine for performance, 4 cycles/bit in ARM
|
int32_t root = 0, try1;
|
||||||
// output precision is 16 bits
|
|
||||||
|
|
||||||
WebRtc_Word32 root = 0, try1;
|
WEBRTC_SPL_SQRT_ITER (15);
|
||||||
|
WEBRTC_SPL_SQRT_ITER (14);
|
||||||
|
WEBRTC_SPL_SQRT_ITER (13);
|
||||||
|
WEBRTC_SPL_SQRT_ITER (12);
|
||||||
|
WEBRTC_SPL_SQRT_ITER (11);
|
||||||
|
WEBRTC_SPL_SQRT_ITER (10);
|
||||||
|
WEBRTC_SPL_SQRT_ITER ( 9);
|
||||||
|
WEBRTC_SPL_SQRT_ITER ( 8);
|
||||||
|
WEBRTC_SPL_SQRT_ITER ( 7);
|
||||||
|
WEBRTC_SPL_SQRT_ITER ( 6);
|
||||||
|
WEBRTC_SPL_SQRT_ITER ( 5);
|
||||||
|
WEBRTC_SPL_SQRT_ITER ( 4);
|
||||||
|
WEBRTC_SPL_SQRT_ITER ( 3);
|
||||||
|
WEBRTC_SPL_SQRT_ITER ( 2);
|
||||||
|
WEBRTC_SPL_SQRT_ITER ( 1);
|
||||||
|
WEBRTC_SPL_SQRT_ITER ( 0);
|
||||||
|
|
||||||
WEBRTC_SPL_SQRT_ITER (15);
|
return root >> 1;
|
||||||
WEBRTC_SPL_SQRT_ITER (14);
|
|
||||||
WEBRTC_SPL_SQRT_ITER (13);
|
|
||||||
WEBRTC_SPL_SQRT_ITER (12);
|
|
||||||
WEBRTC_SPL_SQRT_ITER (11);
|
|
||||||
WEBRTC_SPL_SQRT_ITER (10);
|
|
||||||
WEBRTC_SPL_SQRT_ITER ( 9);
|
|
||||||
WEBRTC_SPL_SQRT_ITER ( 8);
|
|
||||||
WEBRTC_SPL_SQRT_ITER ( 7);
|
|
||||||
WEBRTC_SPL_SQRT_ITER ( 6);
|
|
||||||
WEBRTC_SPL_SQRT_ITER ( 5);
|
|
||||||
WEBRTC_SPL_SQRT_ITER ( 4);
|
|
||||||
WEBRTC_SPL_SQRT_ITER ( 3);
|
|
||||||
WEBRTC_SPL_SQRT_ITER ( 2);
|
|
||||||
WEBRTC_SPL_SQRT_ITER ( 1);
|
|
||||||
WEBRTC_SPL_SQRT_ITER ( 0);
|
|
||||||
|
|
||||||
return root >> 1;
|
|
||||||
}
|
}
|
||||||
|
88
src/common_audio/signal_processing/spl_sqrt_floor.s
Normal file
88
src/common_audio/signal_processing/spl_sqrt_floor.s
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
@ Written by Wilco Dijkstra, 1996. Refer to file LICENSE under
|
||||||
|
@ trunk/third_party_mods/sqrt_floor.
|
||||||
|
@
|
||||||
|
@ Minor modifications in code style for WebRTC, 2012.
|
||||||
|
@ Output is bit-exact with the reference C code in spl_sqrt_floor.c.
|
||||||
|
|
||||||
|
@ Input : r0 32 bit unsigned integer
|
||||||
|
@ Output: r0 = INT (SQRT (r0)), precision is 16 bits
|
||||||
|
@ Registers touched: r1, r2
|
||||||
|
|
||||||
|
.global WebRtcSpl_SqrtFloor
|
||||||
|
|
||||||
|
.align 2
|
||||||
|
WebRtcSpl_SqrtFloor:
|
||||||
|
.fnstart
|
||||||
|
mov r1, #3 << 30
|
||||||
|
mov r2, #1 << 30
|
||||||
|
|
||||||
|
@ unroll for i = 0 .. 15
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 0
|
||||||
|
subhs r0, r0, r2, ror #2 * 0
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 1
|
||||||
|
subhs r0, r0, r2, ror #2 * 1
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 2
|
||||||
|
subhs r0, r0, r2, ror #2 * 2
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 3
|
||||||
|
subhs r0, r0, r2, ror #2 * 3
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 4
|
||||||
|
subhs r0, r0, r2, ror #2 * 4
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 5
|
||||||
|
subhs r0, r0, r2, ror #2 * 5
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 6
|
||||||
|
subhs r0, r0, r2, ror #2 * 6
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 7
|
||||||
|
subhs r0, r0, r2, ror #2 * 7
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 8
|
||||||
|
subhs r0, r0, r2, ror #2 * 8
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 9
|
||||||
|
subhs r0, r0, r2, ror #2 * 9
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 10
|
||||||
|
subhs r0, r0, r2, ror #2 * 10
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 11
|
||||||
|
subhs r0, r0, r2, ror #2 * 11
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 12
|
||||||
|
subhs r0, r0, r2, ror #2 * 12
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 13
|
||||||
|
subhs r0, r0, r2, ror #2 * 13
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 14
|
||||||
|
subhs r0, r0, r2, ror #2 * 14
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
cmp r0, r2, ror #2 * 15
|
||||||
|
subhs r0, r0, r2, ror #2 * 15
|
||||||
|
adc r2, r1, r2, lsl #1
|
||||||
|
|
||||||
|
bic r0, r2, #3 << 30 @ for rounding add: cmp r0, r2 adc r2, #1
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.fnend
|
26
third_party_mods/sqrt_floor/LICENSE
Normal file
26
third_party_mods/sqrt_floor/LICENSE
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
The following email record is related to source files spl_sqrt_floor.c
|
||||||
|
and spl_sqrt_floor.s in trunk/src/common_audio/signal_processing/.
|
||||||
|
|
||||||
|
|
||||||
|
From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
|
||||||
|
Date: Fri, Jun 24, 2011 at 3:20 AM
|
||||||
|
Subject: Re: sqrt routine
|
||||||
|
To: Kevin Ma <kma@google.com>
|
||||||
|
Hi Kevin,
|
||||||
|
Thanks for asking. Those routines are public domain (originally posted to
|
||||||
|
comp.sys.arm a long time ago), so you can use them freely for any purpose.
|
||||||
|
Cheers,
|
||||||
|
Wilco
|
||||||
|
|
||||||
|
----- Original Message -----
|
||||||
|
From: "Kevin Ma" <kma@google.com>
|
||||||
|
To: <Wilco.Dijkstra@ntlworld.com>
|
||||||
|
Sent: Thursday, June 23, 2011 11:44 PM
|
||||||
|
Subject: Fwd: sqrt routine
|
||||||
|
Hi Wilco,
|
||||||
|
I saw your sqrt routine from several web sites, including
|
||||||
|
http://www.finesse.demon.co.uk/steven/sqrt.html.
|
||||||
|
Just wonder if there's any copyright information with your Successive
|
||||||
|
approximation routines, or if I can freely use it for any purpose.
|
||||||
|
Thanks.
|
||||||
|
Kevin
|
Loading…
x
Reference in New Issue
Block a user