Remove legacy fft arm9e code

Either for generic FFT code or FFT code specific to ARM-Cortex or ARM-Neon will be used, this folder should be removed. 
Review URL: http://webrtc-codereview.appspot.com/60003

git-svn-id: http://webrtc.googlecode.com/svn/trunk@225 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
leozwang@google.com 2011-07-18 16:48:17 +00:00
parent 44fe667d86
commit c93db4c4e1
18 changed files with 3 additions and 3864 deletions

View File

@ -65,7 +65,7 @@
((val) << (8 * ((index) & 0x1)))
#endif
#ifndef WEBRTC_ANDROID
#if (defined WEBRTC_ANDROID) && !(defined WEBRTC_ANDROID_ARCH_ARM)
#define WEBRTC_SPL_MUL(a, b) \
((WebRtc_Word32) ((WebRtc_Word32)(a) * (WebRtc_Word32)(b)))
#endif
@ -99,7 +99,7 @@
((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 1) \
+ (((WEBRTC_SPL_MUL_16_U16(a, (WebRtc_UWord16)(b)) >> 1) + 0x2000) >> 14))
#ifndef WEBRTC_ANDROID
#if (defined WEBRTC_ANDROID) && !(defined WEBRTC_ANDROID_ARCH_ARM)
#define WEBRTC_SPL_MUL_16_32_RSFT16(a, b) \
(WEBRTC_SPL_MUL_16_16(a, b >> 16) \
+ ((WEBRTC_SPL_MUL_16_16(a, (b & 0xffff) >> 1) + 0x4000) >> 15))
@ -116,7 +116,7 @@
#ifdef ARM_WINM
#define WEBRTC_SPL_MUL_16_16(a, b) \
_SmulLo_SW_SL((WebRtc_Word16)(a), (WebRtc_Word16)(b))
#elif !defined (WEBRTC_ANDROID)
#elif defined(WEBRTC_ANDROID) && !defined(WEBRTC_ANDROID_ARCH_ARM)
#define WEBRTC_SPL_MUL_16_16(a, b) \
((WebRtc_Word32) (((WebRtc_Word16)(a)) * ((WebRtc_Word16)(b))))
#endif
@ -431,14 +431,6 @@ int WebRtcSpl_DownsampleFast(WebRtc_Word16* in_vector,
// FFT operations
int WebRtcSpl_ComplexFFT(WebRtc_Word16 vector[], int stages, int mode);
int WebRtcSpl_ComplexIFFT(WebRtc_Word16 vector[], int stages, int mode);
#if (defined ARM9E_GCC) || (defined ARM_WINM) || (defined ANDROID_AECOPT)
int WebRtcSpl_ComplexFFT2(WebRtc_Word16 in_vector[],
WebRtc_Word16 out_vector[],
int stages, int mode);
int WebRtcSpl_ComplexIFFT2(WebRtc_Word16 in_vector[],
WebRtc_Word16 out_vector[],
int stages, int mode);
#endif
void WebRtcSpl_ComplexBitReverse(WebRtc_Word16 vector[], int stages);
// End: FFT operations
@ -1575,43 +1567,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
// value of -1, indicating error.
//
#if (defined ARM9E_GCC) || (defined ARM_WINM) || (defined ANDROID_AECOPT)
//
// WebRtcSpl_ComplexIFFT2(...)
//
// Complex or Real inverse FFT, for ARM processor only
//
// Computes a 2^|stages|-point FFT on the input vector, which can be or not be
// in bit-reversed order. If it is bit-reversed, the original content of the
// vector could be overwritten by the output by setting the first two arguments
// the same. With X as the input complex vector, y as the output complex vector
// and with M = 2^|stages|, the following is computed:
//
// M-1
// y(k) = sum[X(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
// i=0
//
// The implementations are optimized for speed, not for code size. It uses the
// decimation-in-time algorithm with radix-2 butterfly technique.
//
// Arguments:
// - in_vector : In pointer to complex vector containing 2^|stages|
// real elements interleaved with 2^|stages| imaginary
// elements. [ReImReImReIm....]
// The elements are in Q(-scale) domain.
// - out_vector : Output pointer to vector containing 2^|stages| real
// elements interleaved with 2^|stages| imaginary
// elements. [ReImReImReIm....]
// The output is in the Q0 domain.
// - stages : Number of FFT stages. Must be at least 3 and at most
// 10.
// - mode : Dummy input.
//
// Return value : The scale parameter is always 0, except if N>1024,
// which returns a scale value of -1, indicating error.
//
#endif
//
// WebRtcSpl_ComplexFFT(...)
//
@ -1657,42 +1612,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
// which returns a scale value of -1, indicating error.
//
#if (defined ARM9E_GCC) || (defined ARM_WINM) || (defined ANDROID_AECOPT)
//
// WebRtcSpl_ComplexFFT2(...)
//
// Complex or Real FFT, for ARM processor only
//
// Computes a 2^|stages|-point FFT on the input vector, which can be or not be
// in bit-reversed order. If it is bit-reversed, the original content of the
// vector could be overwritten by the output by setting the first two arguments
// the same. With x as the input complex vector, Y as the output complex vector
// and with M = 2^|stages|, the following is computed:
//
// M-1
// Y(k) = 1/M * sum[x(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
// i=0
//
// The implementations are optimized for speed, not for code size. It uses the
// decimation-in-time algorithm with radix-2 butterfly technique.
//
// Arguments:
// - in_vector : In pointer to complex vector containing 2^|stages|
// real elements interleaved with 2^|stages| imaginary
// elements. [ReImReImReIm....]
// - out_vector : Output pointer to vector containing 2^|stages| real
// elements interleaved with 2^|stages| imaginary
// elements. [ReImReImReIm....]
// The output is in the Q0 domain.
// - stages : Number of FFT stages. Must be at least 3 and at most
// 10.
// - mode : Dummy input
//
// Return value : The scale parameter is always 0, except if N>1024,
// which returns a scale value of -1, indicating error.
//
#endif
//
// WebRtcSpl_ComplexBitReverse(...)
//

View File

@ -21,16 +21,6 @@
#define CFFTRND 1
#define CFFTRND2 16384
#if (defined ARM9E_GCC) || (defined ARM_WINM) || (defined ANDROID_AECOPT)
extern "C" int FFT_4OFQ14(void *src, void *dest, int NC, int shift);
// For detailed description of the fft functions, check the readme files in fft_ARM9E folder.
int WebRtcSpl_ComplexFFT2(WebRtc_Word16 frfi[], WebRtc_Word16 frfiOut[], int stages, int mode)
{
return FFT_4OFQ14(frfi, frfiOut, 1 << stages, 0);
}
#endif
int WebRtcSpl_ComplexFFT(WebRtc_Word16 frfi[], int stages, int mode)
{
int i, j, l, k, istep, n, m;

View File

@ -20,17 +20,6 @@
#define CIFFTSFT 14
#define CIFFTRND 1
#if (defined ARM9E_GCC) || (defined ARM_WINM) || (defined ANDROID_AECOPT)
extern "C" int FFT_4OIQ14(void *src, void *dest, int NC, int shift);
// For detailed description of the fft functions, check the readme files in fft_ARM9E folder.
int WebRtcSpl_ComplexIFFT2(WebRtc_Word16 frfi[], WebRtc_Word16 frfiOut[], int stages, int mode)
{
FFT_4OIQ14(frfi, frfiOut, 1 << stages, 0);
return 0;
}
#endif
int WebRtcSpl_ComplexIFFT(WebRtc_Word16 frfi[], int stages, int mode)
{
int i, j, l, k, istep, n, m, scale, shift;

View File

@ -1,51 +0,0 @@
;// Optimised ARM assembler multi-radix FFT
INCLUDE fft_main_forward.h
MACRO
GENERATE_FFT_FUNCTION $flags
; first work out a readable function name
; based on the flags
FFT_OPTIONS_STRING $flags, name
; Entry:
; r0 = input array
; r1 = output array
; r2 = number of points in FFT
; r3 = pre-scale shift
;
; Exit:
; r0 = 0 if successful
; = 1 if table too small
;
EXPORT FFT_$name
FFT_4OFQ14
STMFD sp!, {r4-r11, r14}
IF "$radix"="4O"
tablename SETS "_8"
tablename SETS "$qname$coeforder$tablename"
ELSE
tablename SETS "_4"
tablename SETS "$qname$coeforder$tablename"
ENDIF
IMPORT s_$tablename
LDR lr, =s_$tablename
LDR lr,[lr]
CMP N, lr
MOVGT r0, #1
LDMGTFD sp!, {r4-r11, pc}
GENERATE_FFT $flags
MOV r0, #0
LDMFD sp!, {r4-r11, pc}
LTORG
MEND
AREA FFTCODE, CODE, READONLY
GENERATE_FFT_FUNCTION FFT_16bit +FFT_RADIX4_8F +FFT_FORWARD ; +FFT_REVERSED
END

View File

@ -1,51 +0,0 @@
;// Optimised ARM assembler multi-radix FFT
INCLUDE fft_main_inverse.h
MACRO
GENERATE_IFFT_FUNCTION $flags
; first work out a readable function name
; based on the flags
FFT_OPTIONS_STRING $flags, name
; Entry:
; r0 = input array
; r1 = output array
; r2 = number of points in FFT
; r3 = pre-scale shift
;
; Exit:
; r0 = 0 if successful
; = 1 if table too small
;
EXPORT FFT_$name
FFT_4OIQ14
STMFD sp!, {r4-r11, r14}
IF "$radix"="4O"
tablename SETS "_8"
tablename SETS "$qname$coeforder$tablename"
ELSE
tablename SETS "_4"
tablename SETS "$qname$coeforder$tablename"
ENDIF
IMPORT s_$tablename
LDR lr, =s_$tablename
LDR lr,[lr]
CMP N, lr
MOVGT r0, #1
LDMGTFD sp!, {r4-r11, pc}
GENERATE_FFT $flags
MOV r0, #0
LDMFD sp!, {r4-r11, pc}
LTORG
MEND
AREA FFTCODE, CODE, READONLY
GENERATE_IFFT_FUNCTION FFT_16bit +FFT_RADIX4_8F +FFT_INVERSE +FFT_NONORM ; +FFT_REVERSED
END

View File

@ -1,774 +0,0 @@
;
; $Copyright:
; ----------------------------------------------------------------
; This confidential and proprietary software may be used only as
; authorised by a licensing agreement from ARM Limited
; (C) COPYRIGHT 2000,2002 ARM Limited
; ALL RIGHTS RESERVED
; The entire notice above must be reproduced on all authorised
; copies and copies may only be made to the extent permitted
; by a licensing agreement from ARM Limited.
; ----------------------------------------------------------------
; File: fft_mac.h,v
; Revision: 1.14
; ----------------------------------------------------------------
; $
;
; Optimised ARM assembler multi-radix FFT
; Please read the readme.txt before this file
;
; Shared macros and interface definition file.
; NB: All the algorithms in this code are Decimation in Time. ARM
; is much better at Decimation in Time (as opposed to Decimation
; in Frequency) due to the position of the barrel shifter. Decimation
; in time has the twiddeling at the start of the butterfly, where as
; decimation in frequency has it at the end of the butterfly. The
; post multiply shifts can be hidden for Decimation in Time.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; FIRST STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The FIRST STAGE macros "FS_RAD<R>" have the following interface:
;
; ON ENTRY:
; REGISTERS:
; r0 = inptr => points to the input buffer consisting of N complex
; numbers of size (1<<datainlog) bytes each
; r1 = dptr => points to the output buffer consisting of N complex
; numbers of size (1<<datalog) bytes each
; r2 = N => is the number of points in the transform
; r3 = pscale => shift to prescale input by (if applicable)
; ASSEMBLER VARIABLES:
; reversed => logical variable, true if input data is already bit reversed
; The data needs to be bit reversed otherwise
;
; ACTION:
; The routine should
; (1) Bit reverse the data as required for the whole FFT (unless
; the reversed flag is set)
; (2) Prescale the input data by
; (3) Perform a radix R first stage on the data
; (4) Place the processed data in the output array pointed to be dptr
;
; ON EXIT:
; r1 = dptr => preserved and pointing to the output data
; r2 = dinc => number of bytes per "block" or "Group" in this stage
; this is: R<<datalog
; r3 = count => number of radix-R blocks or groups processed in this stage
; this is: N/R
; r0,r4-r12,r14 corrupted
inptr RN 0 ; input buffer
dptr RN 1 ; output/scratch buffer
N RN 2 ; size of the FFT
dptr RN 1 ; data pointer - points to end (load in reverse order)
dinc RN 2 ; bytes between data elements at this level of FFT
count RN 3 ; (elements per block<<16) | (blocks per stage)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; GENERAL STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The GENERAL STAGE macros "GS_RAD<R>" have the following interface.
;
; To describe the arguments, suppose this routine is called as stage j
; in a k-stage FFT with N=R1*R2*...*Rk. This stage is radix R=Rj.
;
; ON ENTRY:
; REGISTERS:
; r0 = cptr => Pointer to twiddle coefficients for this stage consisting
; of complex numbers of size (1<<coeflog) bytes each in some
; stage dependent format.
; The format currently used in described in full in the
; ReadMe file in the tables subdirectory.
; r1 = dptr => points to the working buffer consisting of N complex
; numbers of size (1<<datalog) bytes each
; r2 = dinc => number of bytes per "block" or "Group" in the last stage:
; dinc = (R1*R2*...*R(j-1))<<datalog
; r3 = count => number of blocks or Groups in the last stage:
; count = Rj*R(j+1)*...*Rk
; NB dinc*count = N<<datalog
;
; ACTION:
; The routine should
; (1) Twiddle the input data
; (2) Perform a radix R stage on the data
; (3) Perform the actions in place, result written to the dptr buffer
;
; ON EXIT:
; r0 = cptr => Updated to the end of the coefficients for the stage
; (the coefficients for the next stage will usually follow)
; r1 = dptr => preserved and pointing to the output data
; r2 = dinc => number of bytes per "block" or "Group" in this stage:
; dinc = (R1*R2*..*Rj)<<datalog = (input dinc)*R
; r3 = count => number of radix-R blocks or groups processed in this stage
; count = R(j+1)*...*Rk = (input count)/R
; r0,r4-r12,r14 corrupted
cptr RN 0 ; pointer to twiddle coefficients
dptr RN 1 ; pointer to FFT data working buffer
dinc RN 2 ; bytes per block/group at this stage
count RN 3 ; number of blocks/groups at this stage
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; LAST STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The LAST STAGE macros "LS_RAD<R>" have the following interface.
;
; ON ENTRY:
; REGISTERS:
; r0 = cptr => Pointer to twiddle coefficients for this stage consisting
; of complex numbers of size (1<<coeflog) bytes each in some
; stage dependent format.
; The format currently used in described in full in the
; ReadMe file in the tables subdirectory.
; There is a possible stride between the coefficients
; specified by cinc
; r1 = dptr => points to the working buffer consisting of N complex
; numbers of size (1<<datalog) bytes each
; r2 = dinc => number of bytes per "block" or "Group" in the last stage:
; dinc = (N/R)<<datalog
; r3 = cinc => Bytes between twiddle values in the array pointed to by cptr
;
; ACTION:
; The routine should
; (1) Twiddle the input data
; (2) Perform a (last stage optimised) radix R stage on the data
; (3) Perform the actions in place, result written to the dptr buffer
;
; ON EXIT:
; r0 = cptr => Updated to point to real-to-complex conversion coefficients
; r1 = dptr => preserved and pointing to the output data
; r2 = dinc => number of bytes per "block" or "Group" in this stage:
; dinc = N<<datalog = (input dinc)*R
; r0,r4-r12,r14 corrupted
cptr RN 0 ; pointer to twiddle coefficients
dptr RN 1 ; pointer to FFT data working buffer
dinc RN 2 ; bytes per block/group at this stage
cinc RN 3 ; stride between twiddle coefficients in bytes
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; COMPLEX TO REAL CONVERSION INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The COMPLEX TO REAL macros "LS_ZTOR" have the following interface.
;
; Suppose that 'w' is the N'th root of unity being used for the real FFT
; (usually exp(-2*pi*i/N) for forward transforms and exp(+2*pi*i/N) for
; the inverse transform).
;
; ON ENTRY:
; REGISTERS:
; r0 = cptr => Pointer to twiddle coefficients for this stage
; This consists of (1,w,w^2,w^3,...,w^(N/4-1)).
; There is a stride between each coeficient specified by cinc
; r1 = dptr => points to the working buffer consisting of N/2 complex
; numbers of size (1<<datalog) bytes each
; r2 = dinc => (N/2)<<datalog, the size of the complex buffer in bytes
; r3 = cinc => Bytes between twiddle value in array pointed to by cptr
; r4 = dout => Output buffer (usually the same as dptr)
;
; ACTION:
; The routine should take the output of an N/2 point complex FFT and convert
; it to the output of an N point real FFT, assuming that the real input
; inputs were packed up into the real,imag,real,imag,... buffers of the complex
; input. The output is N/2 complex numbers of the form:
; y[0]+i*y[N/2], y[1], y[2], ..., y[N/2-1]
; where y[0],...,y[N-1] is the output from a complex transform of the N
; real inputs.
;
; ON EXIT:
; r0-r12,r14 corrupted
cptr RN 0 ; pointer to twiddle coefficients
dptr RN 1 ; pointer to FFT data working buffer
dinc RN 2 ; (N/2)<<datalog, the size of the data in bytes
cinc RN 3 ; bytes between twiddle values in the coefficient buffer
dout RN 4 ; address to write the output (normally the same as dptr)
;;;;;;;;;;;;;;;;;;;;;; END OF INTERFACES ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; first stage/outer loop level
;inptr RN 0
;dptr RN 1
;N RN 2 ; size of FFT
;dinc RN 2 ; bytes between block size when bit reversed (scaling of N)
bitrev RN 3
; inner loop level
;cptr RN 0 ; coefficient pointer for this level
;dptr RN 1 ; data pointer - points to end (load in reverse order)
;dinc RN 2 ; bytes between data elements at this level of FFT
;count RN 3 ; (elements per block<<16) | (blocks per stage)
; data registers
x0r RN 4
x0i RN 5
x1r RN 6
x1i RN 7
x2r RN 8
x2i RN 9
x3r RN 10
x3i RN 11
t0 RN 12 ; these MUST be in correct order (t0<t1) for STM's
t1 RN 14
MACRO
SETREG $prefix,$v0,$v1
GBLS $prefix.r
GBLS $prefix.i
$prefix.r SETS "$v0"
$prefix.i SETS "$v1"
MEND
MACRO
SETREGS $prefix,$v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
SETREG $prefix.0,$v0,$v1
SETREG $prefix.1,$v2,$v3
SETREG $prefix.2,$v4,$v5
SETREG $prefix.3,$v6,$v7
MEND
MACRO
SET2REGS $prefix,$v0,$v1,$v2,$v3
SETREG $prefix.0,$v0,$v1
SETREG $prefix.1,$v2,$v3
MEND
; Macro to load twiddle coeficients
; Customise according to coeficient format
; Load next 3 complex coeficients into thr given registers
; Update the coeficient pointer
MACRO
LOADCOEFS $cp, $c0r, $c0i, $c1r, $c1i, $c2r, $c2i
IF "$coefformat"="W"
; one word per scalar
LDMIA $cp!, {$c0r, $c0i, $c1r, $c1i, $c2r, $c2i}
MEXIT
ENDIF
IF "$coefformat"="H"
; one half word per scalar
LDRSH $c0r, [$cp], #2
LDRSH $c0i, [$cp], #2
LDRSH $c1r, [$cp], #2
LDRSH $c1i, [$cp], #2
LDRSH $c2r, [$cp], #2
LDRSH $c2i, [$cp], #2
MEXIT
ENDIF
ERROR "Unsupported coeficient format: $coefformat"
MEND
; Macro to load one twiddle coeficient
; $cp = address to load complex data
; $ci = post index to make to address after load
MACRO
LOADCOEF $cp, $ci, $re, $im
IF "$coefformat"="W"
LDR $im, [$cp, #4]
LDR $re, [$cp], $ci
MEXIT
ENDIF
IF "$coefformat"="H"
LDRSH $im, [$cp, #2]
LDRSH $re, [$cp], $ci
MEXIT
ENDIF
ERROR "Unsupported coeficient format: $coefformat"
MEND
; Macro to load one component of one twiddle coeficient
; $cp = address to load complex data
; $ci = post index to make to address after load
MACRO
LOADCOEFR $cp, $re
IF "$coefformat"="W"
LDR $re, [$cp]
MEXIT
ENDIF
IF "$coefformat"="H"
LDRSH $re, [$cp]
MEXIT
ENDIF
ERROR "Unsupported coeficient format: $coefformat"
MEND
; Macro to load data elements in the given format
; $dp = address to load complex data
; $di = post index to make to address after load
MACRO
LOADDATAF $dp, $di, $re, $im, $format
IF "$format"="W"
LDR $im, [$dp, #4]
LDR $re, [$dp], $di
MEXIT
ENDIF
IF "$format"="H"
LDRSH $im, [$dp, #2]
LDRSH $re, [$dp], $di
MEXIT
ENDIF
ERROR "Unsupported load format: $format"
MEND
MACRO
LOADDATAZ $dp, $re, $im
IF "$datainformat"="W"
LDMIA $dp, {$re,$im}
MEXIT
ENDIF
IF "$datainformat"="H"
LDRSH $im, [$dp, #2]
LDRSH $re, [$dp]
MEXIT
ENDIF
ERROR "Unsupported load format: $format"
MEND
; Load a complex data element from the working array
MACRO
LOADDATA $dp, $di, $re, $im
LOADDATAF $dp, $di, $re, $im, $dataformat
MEND
; Load a complex data element from the input array
MACRO
LOADDATAI $dp, $di, $re, $im
LOADDATAF $dp, $di, $re, $im, $datainformat
MEND
MACRO
LOADDATA4 $dp, $re0,$im0, $re1,$im1, $re2,$im2, $re3,$im3
IF "$datainformat"="W"
LDMIA $dp!, {$re0,$im0, $re1,$im1, $re2,$im2, $re3,$im3}
ELSE
LOADDATAI $dp, #1<<$datalog, $re0,$im0
LOADDATAI $dp, #1<<$datalog, $re1,$im1
LOADDATAI $dp, #1<<$datalog, $re2,$im2
LOADDATAI $dp, #1<<$datalog, $re3,$im3
ENDIF
MEND
; Shift data after load
MACRO
SHIFTDATA $dr, $di
IF "$postldshift"<>""
IF "$di"<>""
MOV $di, $di $postldshift
ENDIF
MOV $dr, $dr $postldshift
ENDIF
MEND
; Store a complex data item in the output data buffer
MACRO
STORE $dp, $di, $re, $im
IF "$dataformat"="W"
STR $im, [$dp, #4]
STR $re, [$dp], $di
MEXIT
ENDIF
IF "$dataformat"="H"
STRH $im, [$dp, #2]
STRH $re, [$dp], $di
MEXIT
ENDIF
ERROR "Unsupported save format: $dataformat"
MEND
; Store a complex data item in the output data buffer
MACRO
STOREP $dp, $re, $im
IF "$dataformat"="W"
STMIA $dp!, {$re,$im}
MEXIT
ENDIF
IF "$dataformat"="H"
STRH $im, [$dp, #2]
STRH $re, [$dp], #4
MEXIT
ENDIF
ERROR "Unsupported save format: $dataformat"
MEND
MACRO
STORE3P $dp, $re0, $im0, $re1, $im1, $re2, $im2
IF "$dataformat"="W"
STMIA $dp!, {$re0,$im0, $re1,$im1, $re2,$im2}
MEXIT
ENDIF
IF "$dataformat"="H"
STRH $im0, [$dp, #2]
STRH $re0, [$dp], #4
STRH $im1, [$dp, #2]
STRH $re1, [$dp], #4
STRH $im2, [$dp, #2]
STRH $re2, [$dp], #4
MEXIT
ENDIF
ERROR "Unsupported save format: $dataformat"
MEND
; do different command depending on forward/inverse FFT
MACRO
DOi $for, $bac, $d, $s1, $s2, $shift
IF "$shift"=""
$for $d, $s1, $s2
ELSE
$for $d, $s1, $s2, $shift
ENDIF
MEND
; d = s1 + s2 if w=exp(+2*pi*i/N) j=+i - inverse transform
; d = s1 - s2 if w=exp(-2*pi*i/N) j=-i - forward transform
MACRO
ADDi $d, $s1, $s2, $shift
DOi SUB, ADD, $d, $s1, $s2, $shift
MEND
; d = s1 - s2 if w=exp(+2*pi*i/N) j=+i - inverse transform
; d = s1 + s2 if w=exp(-2*pi*i/N) j=-i - forward transform
MACRO
SUBi $d, $s1, $s2, $shift
DOi ADD, SUB, $d, $s1, $s2, $shift
MEND
; check that $val is in the range -$max to +$max-1
; set carry flag (sicky) if not (2 cycles)
; has the advantage of not needing a separate register
; to store the overflow state
MACRO
CHECKOV $val, $tmp, $max
EOR $tmp, $val, $val, ASR#31
CMPCC $tmp, $max
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; Macro's to perform the twiddle stage (complex multiply by coefficient)
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The coefficients are stored in different formats according to the
; precision and processor architecture. The coefficients required
; will be of the form:
;
; c(k) = cos( + k*2*pi*i/N ), s(k) = sin( + k*2*pi*i/N )
;
; c(k) + i*s(k) = exp(+2*pi*k*i/N)
;
; for some k's. The storage formats are:
;
; Format Data
; Q14S (c-s, s) in Q14 format, 16-bits per real
; Q14R (c, s) in Q14 format, 16-bits per real
; Q30S (c-s, s) in Q30 format, 32-bits per real
;
; The operation to be performed is one of:
;
; a+i*b = (x+i*y)*(c-i*s) => forward transform
; OR a+i*b = (x+i*y)*(c+i*s) => inverse transform
;
; For the R format the operation is quite simple - requiring 4 muls
; and 2 adds:
;
; Forward: a = x*c+y*s, b = y*c-x*s
; Inverse: a = x*c-y*s, b = y*c+x*s
;
; For the S format the operations is more complex but only requires
; three multiplies, and is simpler to schedule:
;
; Forward: a = (y-x)*s + x*(c+s) = x*(c-s) + (x+y)*s
; b = (y-x)*s + y*(c-s) = y*(c+s) - (x+y)*s
;
; Inverse: a = (x-y)*s + x*(c-s)
; b = (x-y)*s + y*(c+s)
;
; S advantage 16bit: 1ADD, 1SUB, 1MUL, 2MLA instead of 1SUB, 3MUL, 1MLA
; S advantage 32bit: 2ADD, 1SUB, 2SMULL, 1SMLAL instead of 1RSB, 2SMULL, 2SMLAL
; So S wins except for a very fast multiplier (eg 9E)
;
; NB The coefficients must always be the second operand on processor that
; take a variable number of cycles per multiply - so the FFT time remains constant
; This twiddle takes unpacked real and imaginary values
; Expects (cr,ci) = (c-s,s) on input
; Sets (cr,ci) = (a,b) on output
MACRO
TWIDDLE $xr, $xi, $cr, $ci, $t0, $t1
IF qshift>=0 :LAND: qshift<32
SUB $t1, $xi, $xr ; y-x
MUL $t0, $t1, $ci ; (y-x)*s
ADD $t1, $cr, $ci, LSL #1 ; t1 = c+s allow mul to finish on SA
MLA $ci, $xi, $cr, $t0 ; b
MLA $cr, $xr, $t1, $t0 ; a
ELSE
ADD $t1, $cr, $ci, LSL #1 ; t1 = c+s
SMULL $cr, $t0, $xi, $cr ; t0 = y*(c-s)
SUB $xi, $xi, $xr ; xr = y-x + allow mul to finish on SA
SMULL $ci, $cr, $xi, $ci ; cr = (y-x)*s
ADD $ci, $cr, $t0 ; b + allow mul to finish on SA
SMLAL $t0, $cr, $xr, $t1 ; a
ENDIF
MEND
; The following twiddle variant is similar to the above
; except that it is for an "E" processor varient. A standard
; 4 multiply twiddle is used as it requires the same number
; of cycles and needs less intermediate precision
;
; $co = coeficent real and imaginary (c,s) (packed)
; $xx = input data real and imaginary part (packed)
;
; $xr = destination register for real part of product
; $xi = destination register for imaginary part of product
;
; All registers should be distinct
;
MACRO
TWIDDLE_E $xr, $xi, $c0, $t0, $xx, $xxi
SMULBT $t0, $xx, $c0
SMULBB $xr, $xx, $c0
IF "$xxi"=""
SMULTB $xi, $xx, $c0
SMLATT $xr, $xx, $c0, $xr
ELSE
SMULBB $xi, $xxi, $c0
SMLABT $xr, $xxi, $c0, $xr
ENDIF
SUB $xi, $xi, $t0
MEND
; Scale data value in by the coefficient, writing result to out
; The coeficient must be the second multiplicand
; The post mul shift need not be done so in most cases this
; is just a multiply (unless you need higher precision)
; coef must be preserved
MACRO
SCALE $out, $in, $coef, $tmp
IF qshift>=0 :LAND: qshift<32
MUL $out, $in, $coef
ELSE
SMULL $tmp, $out, $in, $coef
ENDIF
MEND
MACRO
DECODEFORMAT $out, $format
GBLS $out.log
GBLS $out.format
$out.format SETS "$format"
IF "$format"="B"
$out.log SETS "1"
MEXIT
ENDIF
IF "$format"="H"
$out.log SETS "2"
MEXIT
ENDIF
IF "$format"="W"
$out.log SETS "3"
MEXIT
ENDIF
ERROR "Unrecognised format for $out: $format"
MEND
; generate a string in $var of the correct right shift
; amount - negative values = left shift
MACRO
SETSHIFT $var, $value
LCLA svalue
svalue SETA $value
$var SETS ""
IF svalue>0 :LAND: svalue<32
$var SETS ",ASR #0x$svalue"
ENDIF
svalue SETA -svalue
IF svalue>0 :LAND: svalue<32
$var SETS ",LSL #0x$svalue"
ENDIF
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; CODE to decipher the FFT options ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The $flags variable specifies the FFT options
; The global string $name is set to a textual version
; The global string $table is set the table name
MACRO
FFT_OPTIONS_STRING $flags, $name
GBLS $name
GBLS qname ; name of the precision (eg Q14, Q30)
GBLS direction ; name of the direction (eg I, F)
GBLS radix ; name of the radix (2, 4E, 4B, 4O etc)
GBLS intype ; name of input data type (if real)
GBLS prescale ; flag to indicate prescale
GBLS outpos ; position for the output data
GBLS datainformat ; bytes per input data item
GBLS dataformat ; bytes per working item
GBLS coefformat ; bytes per coefficient working item
GBLS coeforder ; R=(c,s) S=(c-s,s) storage format
GBLA datainlog ; shift to bytes per input complex
GBLA datalog ; shift to bytes per working complex
GBLA coeflog ; shift to bytes per coefficient complex
GBLA qshift ; right shift after multiply
GBLA norm
GBLA architecture ; 4=Arch4(7TDMI,SA), 5=Arch5TE(ARM9E)
GBLS cdshift
GBLS postmulshift
GBLS postldshift
GBLS postmulshift1
GBLS postldshift1
GBLL reversed ; flag to indicate input is already bit reversed
GBLS tablename
; find what sort of processor we are building the FFT for
architecture SETA 4 ; Architecture 4 (7TDMI, StrongARM etc)
;qname SETS {CPU}
; P $qname
IF ((({ARCHITECTURE}:CC:"aaaa"):LEFT:3="5TE") :LOR: (({ARCHITECTURE}:CC:"aa"):LEFT:1="6"))
architecture SETA 5 ; Architecture 5 (ARM9E, E extensions)
; P arch E
ENDIF
reversed SETL {FALSE}
; decode input order
IF ($flags:AND:FFT_INPUTORDER)=FFT_REVERSED
reversed SETL {TRUE}
ENDIF
; decode radix type to $radix
IF ($flags:AND:FFT_RADIX)=FFT_RADIX4
radix SETS "4E"
ENDIF
IF ($flags:AND:FFT_RADIX)=FFT_RADIX4_8F
radix SETS "4O"
ENDIF
IF ($flags:AND:FFT_RADIX)=FFT_RADIX4_2L
radix SETS "4B"
ENDIF
; decode direction to $direction
direction SETS "F"
; decode data size to $qname, and *log's
IF ($flags:AND:FFT_DATA_SIZES)=FFT_32bit
qname SETS "Q30"
datainlog SETA 3 ; 8 bytes per complex
datalog SETA 3
coeflog SETA 3
datainformat SETS "W"
dataformat SETS "W"
coefformat SETS "W"
qshift SETA -2 ; shift left top word of 32 bit result
ENDIF
IF ($flags:AND:FFT_DATA_SIZES)=FFT_16bit
qname SETS "Q14"
datainlog SETA 2
datalog SETA 2
coeflog SETA 2
datainformat SETS "H"
dataformat SETS "H"
coefformat SETS "H"
qshift SETA 14
ENDIF
; find the coefficient ordering
coeforder SETS "S"
IF (architecture>=5):LAND:(qshift<16)
coeforder SETS "R"
ENDIF
; decode real vs complex input data type
intype SETS ""
IF ($flags:AND:FFT_INPUTTYPE)=FFT_REAL
intype SETS "R"
ENDIF
; decode on outpos
outpos SETS ""
IF ($flags:AND:FFT_OUTPUTPOS)=FFT_OUT_INBUF
outpos SETS "I"
ENDIF
; decode on prescale
prescale SETS ""
IF ($flags:AND:FFT_INPUTSCALE)=FFT_PRESCALE
prescale SETS "P"
ENDIF
; decode on output scale
norm SETA 1
IF ($flags:AND:FFT_OUTPUTSCALE)=FFT_NONORM
norm SETA 0
ENDIF
; calculate shift to convert data offsets to coefficient offsets
SETSHIFT cdshift, ($datalog)-($coeflog)
$name SETS "$radix$direction$qname$intype$outpos$prescale"
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; FFT GENERATOR ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; FFT options bitfield
FFT_DIRECTION EQU 0x00000001 ; direction select bit
FFT_FORWARD EQU 0x00000000 ; forward exp(-ijkw) coefficient FFT
FFT_INVERSE EQU 0x00000001 ; inverse exp(+ijkw) coefficient FFT
FFT_INPUTORDER EQU 0x00000002 ; input order select field
FFT_BITREV EQU 0x00000000 ; input data is in normal order (bit reverse)
FFT_REVERSED EQU 0x00000002 ; assume input data is already bit revesed
FFT_INPUTSCALE EQU 0x00000004 ; select scale on input data
FFT_NOPRESCALE EQU 0x00000000 ; do not scale input data
FFT_PRESCALE EQU 0x00000004 ; scale input data up by a register amount
FFT_INPUTTYPE EQU 0x00000010 ; selector for real/complex input data
FFT_COMPLEX EQU 0x00000000 ; do complex FFT of N points
FFT_REAL EQU 0x00000010 ; do a 2*N point real FFT
FFT_OUTPUTPOS EQU 0x00000020 ; where is the output placed?
FFT_OUT_OUTBUF EQU 0x00000000 ; default - in the output buffer
FFT_OUT_INBUF EQU 0x00000020 ; copy it back to the input buffer
FFT_RADIX EQU 0x00000F00 ; radix select
FFT_RADIX4 EQU 0x00000000 ; radix 4 (log_2 N must be even)
FFT_RADIX4_8F EQU 0x00000100 ; radix 4 with radix 8 first stage
FFT_RADIX4_2L EQU 0x00000200 ; radix 4 with optional radix 2 last stage
FFT_OUTPUTSCALE EQU 0x00001000 ; select output scale value
FFT_NORMALISE EQU 0x00000000 ; default - divide by N during algorithm
FFT_NONORM EQU 0x00001000 ; calculate the raw sum (no scale)
FFT_DATA_SIZES EQU 0x000F0000
FFT_16bit EQU 0x00000000 ; 16-bit data and Q14 coefs
FFT_32bit EQU 0x00010000 ; 32-bit data and Q30 coefs
END

View File

@ -1,774 +0,0 @@
;
; $Copyright:
; ----------------------------------------------------------------
; This confidential and proprietary software may be used only as
; authorised by a licensing agreement from ARM Limited
; (C) COPYRIGHT 2000,2002 ARM Limited
; ALL RIGHTS RESERVED
; The entire notice above must be reproduced on all authorised
; copies and copies may only be made to the extent permitted
; by a licensing agreement from ARM Limited.
; ----------------------------------------------------------------
; File: fft_mac.h,v
; Revision: 1.14
; ----------------------------------------------------------------
; $
;
; Optimised ARM assembler multi-radix FFT
; Please read the readme.txt before this file
;
; Shared macros and interface definition file.
; NB: All the algorithms in this code are Decimation in Time. ARM
; is much better at Decimation in Time (as opposed to Decimation
; in Frequency) due to the position of the barrel shifter. Decimation
; in time has the twiddeling at the start of the butterfly, where as
; decimation in frequency has it at the end of the butterfly. The
; post multiply shifts can be hidden for Decimation in Time.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; FIRST STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The FIRST STAGE macros "FS_RAD<R>" have the following interface:
;
; ON ENTRY:
; REGISTERS:
; r0 = inptr => points to the input buffer consisting of N complex
; numbers of size (1<<datainlog) bytes each
; r1 = dptr => points to the output buffer consisting of N complex
; numbers of size (1<<datalog) bytes each
; r2 = N => is the number of points in the transform
; r3 = pscale => shift to prescale input by (if applicable)
; ASSEMBLER VARIABLES:
; reversed => logical variable, true if input data is already bit reversed
; The data needs to be bit reversed otherwise
;
; ACTION:
; The routine should
; (1) Bit reverse the data as required for the whole FFT (unless
; the reversed flag is set)
; (2) Prescale the input data by
; (3) Perform a radix R first stage on the data
; (4) Place the processed data in the output array pointed to be dptr
;
; ON EXIT:
; r1 = dptr => preserved and pointing to the output data
; r2 = dinc => number of bytes per "block" or "Group" in this stage
; this is: R<<datalog
; r3 = count => number of radix-R blocks or groups processed in this stage
; this is: N/R
; r0,r4-r12,r14 corrupted
inptr RN 0 ; input buffer
dptr RN 1 ; output/scratch buffer
N RN 2 ; size of the FFT
dptr RN 1 ; data pointer - points to end (load in reverse order)
dinc RN 2 ; bytes between data elements at this level of FFT
count RN 3 ; (elements per block<<16) | (blocks per stage)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; GENERAL STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The GENERAL STAGE macros "GS_RAD<R>" have the following interface.
;
; To describe the arguments, suppose this routine is called as stage j
; in a k-stage FFT with N=R1*R2*...*Rk. This stage is radix R=Rj.
;
; ON ENTRY:
; REGISTERS:
; r0 = cptr => Pointer to twiddle coefficients for this stage consisting
; of complex numbers of size (1<<coeflog) bytes each in some
; stage dependent format.
; The format currently used in described in full in the
; ReadMe file in the tables subdirectory.
; r1 = dptr => points to the working buffer consisting of N complex
; numbers of size (1<<datalog) bytes each
; r2 = dinc => number of bytes per "block" or "Group" in the last stage:
; dinc = (R1*R2*...*R(j-1))<<datalog
; r3 = count => number of blocks or Groups in the last stage:
; count = Rj*R(j+1)*...*Rk
; NB dinc*count = N<<datalog
;
; ACTION:
; The routine should
; (1) Twiddle the input data
; (2) Perform a radix R stage on the data
; (3) Perform the actions in place, result written to the dptr buffer
;
; ON EXIT:
; r0 = cptr => Updated to the end of the coefficients for the stage
; (the coefficients for the next stage will usually follow)
; r1 = dptr => preserved and pointing to the output data
; r2 = dinc => number of bytes per "block" or "Group" in this stage:
; dinc = (R1*R2*..*Rj)<<datalog = (input dinc)*R
; r3 = count => number of radix-R blocks or groups processed in this stage
; count = R(j+1)*...*Rk = (input count)/R
; r0,r4-r12,r14 corrupted
cptr RN 0 ; pointer to twiddle coefficients
dptr RN 1 ; pointer to FFT data working buffer
dinc RN 2 ; bytes per block/group at this stage
count RN 3 ; number of blocks/groups at this stage
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; LAST STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The LAST STAGE macros "LS_RAD<R>" have the following interface.
;
; ON ENTRY:
; REGISTERS:
; r0 = cptr => Pointer to twiddle coefficients for this stage consisting
; of complex numbers of size (1<<coeflog) bytes each in some
; stage dependent format.
; The format currently used in described in full in the
; ReadMe file in the tables subdirectory.
; There is a possible stride between the coefficients
; specified by cinc
; r1 = dptr => points to the working buffer consisting of N complex
; numbers of size (1<<datalog) bytes each
; r2 = dinc => number of bytes per "block" or "Group" in the last stage:
; dinc = (N/R)<<datalog
; r3 = cinc => Bytes between twiddle values in the array pointed to by cptr
;
; ACTION:
; The routine should
; (1) Twiddle the input data
; (2) Perform a (last stage optimised) radix R stage on the data
; (3) Perform the actions in place, result written to the dptr buffer
;
; ON EXIT:
; r0 = cptr => Updated to point to real-to-complex conversion coefficients
; r1 = dptr => preserved and pointing to the output data
; r2 = dinc => number of bytes per "block" or "Group" in this stage:
; dinc = N<<datalog = (input dinc)*R
; r0,r4-r12,r14 corrupted
cptr RN 0 ; pointer to twiddle coefficients
dptr RN 1 ; pointer to FFT data working buffer
dinc RN 2 ; bytes per block/group at this stage
cinc RN 3 ; stride between twiddle coefficients in bytes
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; COMPLEX TO REAL CONVERSION INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The COMPLEX TO REAL macros "LS_ZTOR" have the following interface.
;
; Suppose that 'w' is the N'th root of unity being used for the real FFT
; (usually exp(-2*pi*i/N) for forward transforms and exp(+2*pi*i/N) for
; the inverse transform).
;
; ON ENTRY:
; REGISTERS:
; r0 = cptr => Pointer to twiddle coefficients for this stage
; This consists of (1,w,w^2,w^3,...,w^(N/4-1)).
; There is a stride between each coeficient specified by cinc
; r1 = dptr => points to the working buffer consisting of N/2 complex
; numbers of size (1<<datalog) bytes each
; r2 = dinc => (N/2)<<datalog, the size of the complex buffer in bytes
; r3 = cinc => Bytes between twiddle value in array pointed to by cptr
; r4 = dout => Output buffer (usually the same as dptr)
;
; ACTION:
; The routine should take the output of an N/2 point complex FFT and convert
; it to the output of an N point real FFT, assuming that the real input
; inputs were packed up into the real,imag,real,imag,... buffers of the complex
; input. The output is N/2 complex numbers of the form:
; y[0]+i*y[N/2], y[1], y[2], ..., y[N/2-1]
; where y[0],...,y[N-1] is the output from a complex transform of the N
; real inputs.
;
; ON EXIT:
; r0-r12,r14 corrupted
cptr RN 0 ; pointer to twiddle coefficients
dptr RN 1 ; pointer to FFT data working buffer
dinc RN 2 ; (N/2)<<datalog, the size of the data in bytes
cinc RN 3 ; bytes between twiddle values in the coefficient buffer
dout RN 4 ; address to write the output (normally the same as dptr)
;;;;;;;;;;;;;;;;;;;;;; END OF INTERFACES ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; first stage/outer loop level
;inptr RN 0
;dptr RN 1
;N RN 2 ; size of FFT
;dinc RN 2 ; bytes between block size when bit reversed (scaling of N)
bitrev RN 3
; inner loop level
;cptr RN 0 ; coefficient pointer for this level
;dptr RN 1 ; data pointer - points to end (load in reverse order)
;dinc RN 2 ; bytes between data elements at this level of FFT
;count RN 3 ; (elements per block<<16) | (blocks per stage)
; data registers
x0r RN 4
x0i RN 5
x1r RN 6
x1i RN 7
x2r RN 8
x2i RN 9
x3r RN 10
x3i RN 11
t0 RN 12 ; these MUST be in correct order (t0<t1) for STM's
t1 RN 14
MACRO
SETREG $prefix,$v0,$v1
GBLS $prefix.r
GBLS $prefix.i
$prefix.r SETS "$v0"
$prefix.i SETS "$v1"
MEND
MACRO
SETREGS $prefix,$v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
SETREG $prefix.0,$v0,$v1
SETREG $prefix.1,$v2,$v3
SETREG $prefix.2,$v4,$v5
SETREG $prefix.3,$v6,$v7
MEND
MACRO
SET2REGS $prefix,$v0,$v1,$v2,$v3
SETREG $prefix.0,$v0,$v1
SETREG $prefix.1,$v2,$v3
MEND
; Macro to load twiddle coeficients
; Customise according to coeficient format
; Load next 3 complex coeficients into thr given registers
; Update the coeficient pointer
MACRO
LOADCOEFS $cp, $c0r, $c0i, $c1r, $c1i, $c2r, $c2i
IF "$coefformat"="W"
; one word per scalar
LDMIA $cp!, {$c0r, $c0i, $c1r, $c1i, $c2r, $c2i}
MEXIT
ENDIF
IF "$coefformat"="H"
; one half word per scalar
LDRSH $c0r, [$cp], #2
LDRSH $c0i, [$cp], #2
LDRSH $c1r, [$cp], #2
LDRSH $c1i, [$cp], #2
LDRSH $c2r, [$cp], #2
LDRSH $c2i, [$cp], #2
MEXIT
ENDIF
ERROR "Unsupported coeficient format: $coefformat"
MEND
; Macro to load one twiddle coeficient
; $cp = address to load complex data
; $ci = post index to make to address after load
MACRO
LOADCOEF $cp, $ci, $re, $im
IF "$coefformat"="W"
LDR $im, [$cp, #4]
LDR $re, [$cp], $ci
MEXIT
ENDIF
IF "$coefformat"="H"
LDRSH $im, [$cp, #2]
LDRSH $re, [$cp], $ci
MEXIT
ENDIF
ERROR "Unsupported coeficient format: $coefformat"
MEND
; Macro to load one component of one twiddle coeficient
; $cp = address to load complex data
; $ci = post index to make to address after load
MACRO
LOADCOEFR $cp, $re
IF "$coefformat"="W"
LDR $re, [$cp]
MEXIT
ENDIF
IF "$coefformat"="H"
LDRSH $re, [$cp]
MEXIT
ENDIF
ERROR "Unsupported coeficient format: $coefformat"
MEND
; Macro to load data elements in the given format
; $dp = address to load complex data
; $di = post index to make to address after load
MACRO
LOADDATAF $dp, $di, $re, $im, $format
IF "$format"="W"
LDR $im, [$dp, #4]
LDR $re, [$dp], $di
MEXIT
ENDIF
IF "$format"="H"
LDRSH $im, [$dp, #2]
LDRSH $re, [$dp], $di
MEXIT
ENDIF
ERROR "Unsupported load format: $format"
MEND
MACRO
LOADDATAZ $dp, $re, $im
IF "$datainformat"="W"
LDMIA $dp, {$re,$im}
MEXIT
ENDIF
IF "$datainformat"="H"
LDRSH $im, [$dp, #2]
LDRSH $re, [$dp]
MEXIT
ENDIF
ERROR "Unsupported load format: $format"
MEND
; Load a complex data element from the working array
MACRO
LOADDATA $dp, $di, $re, $im
LOADDATAF $dp, $di, $re, $im, $dataformat
MEND
; Load a complex data element from the input array
MACRO
LOADDATAI $dp, $di, $re, $im
LOADDATAF $dp, $di, $re, $im, $datainformat
MEND
MACRO
LOADDATA4 $dp, $re0,$im0, $re1,$im1, $re2,$im2, $re3,$im3
IF "$datainformat"="W"
LDMIA $dp!, {$re0,$im0, $re1,$im1, $re2,$im2, $re3,$im3}
ELSE
LOADDATAI $dp, #1<<$datalog, $re0,$im0
LOADDATAI $dp, #1<<$datalog, $re1,$im1
LOADDATAI $dp, #1<<$datalog, $re2,$im2
LOADDATAI $dp, #1<<$datalog, $re3,$im3
ENDIF
MEND
; Shift data after load
MACRO
SHIFTDATA $dr, $di
IF "$postldshift"<>""
IF "$di"<>""
MOV $di, $di $postldshift
ENDIF
MOV $dr, $dr $postldshift
ENDIF
MEND
; Store a complex data item in the output data buffer
MACRO
STORE $dp, $di, $re, $im
IF "$dataformat"="W"
STR $im, [$dp, #4]
STR $re, [$dp], $di
MEXIT
ENDIF
IF "$dataformat"="H"
STRH $im, [$dp, #2]
STRH $re, [$dp], $di
MEXIT
ENDIF
ERROR "Unsupported save format: $dataformat"
MEND
; Store a complex data item in the output data buffer
MACRO
STOREP $dp, $re, $im
IF "$dataformat"="W"
STMIA $dp!, {$re,$im}
MEXIT
ENDIF
IF "$dataformat"="H"
STRH $im, [$dp, #2]
STRH $re, [$dp], #4
MEXIT
ENDIF
ERROR "Unsupported save format: $dataformat"
MEND
MACRO
STORE3P $dp, $re0, $im0, $re1, $im1, $re2, $im2
IF "$dataformat"="W"
STMIA $dp!, {$re0,$im0, $re1,$im1, $re2,$im2}
MEXIT
ENDIF
IF "$dataformat"="H"
STRH $im0, [$dp, #2]
STRH $re0, [$dp], #4
STRH $im1, [$dp, #2]
STRH $re1, [$dp], #4
STRH $im2, [$dp, #2]
STRH $re2, [$dp], #4
MEXIT
ENDIF
ERROR "Unsupported save format: $dataformat"
MEND
; do different command depending on forward/inverse FFT
MACRO
DOi $for, $bac, $d, $s1, $s2, $shift
IF "$shift"=""
$bac $d, $s1, $s2
ELSE
$bac $d, $s1, $s2, $shift
ENDIF
MEND
; d = s1 + s2 if w=exp(+2*pi*i/N) j=+i - inverse transform
; d = s1 - s2 if w=exp(-2*pi*i/N) j=-i - forward transform
MACRO
ADDi $d, $s1, $s2, $shift
DOi SUB, ADD, $d, $s1, $s2, $shift
MEND
; d = s1 - s2 if w=exp(+2*pi*i/N) j=+i - inverse transform
; d = s1 + s2 if w=exp(-2*pi*i/N) j=-i - forward transform
MACRO
SUBi $d, $s1, $s2, $shift
DOi ADD, SUB, $d, $s1, $s2, $shift
MEND
; check that $val is in the range -$max to +$max-1
; set carry flag (sicky) if not (2 cycles)
; has the advantage of not needing a separate register
; to store the overflow state
MACRO
CHECKOV $val, $tmp, $max
EOR $tmp, $val, $val, ASR#31
CMPCC $tmp, $max
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; Macro's to perform the twiddle stage (complex multiply by coefficient)
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The coefficients are stored in different formats according to the
; precision and processor architecture. The coefficients required
; will be of the form:
;
; c(k) = cos( + k*2*pi*i/N ), s(k) = sin( + k*2*pi*i/N )
;
; c(k) + i*s(k) = exp(+2*pi*k*i/N)
;
; for some k's. The storage formats are:
;
; Format Data
; Q14S (c-s, s) in Q14 format, 16-bits per real
; Q14R (c, s) in Q14 format, 16-bits per real
; Q30S (c-s, s) in Q30 format, 32-bits per real
;
; The operation to be performed is one of:
;
; a+i*b = (x+i*y)*(c-i*s) => forward transform
; OR a+i*b = (x+i*y)*(c+i*s) => inverse transform
;
; For the R format the operation is quite simple - requiring 4 muls
; and 2 adds:
;
; Forward: a = x*c+y*s, b = y*c-x*s
; Inverse: a = x*c-y*s, b = y*c+x*s
;
; For the S format the operations is more complex but only requires
; three multiplies, and is simpler to schedule:
;
; Forward: a = (y-x)*s + x*(c+s) = x*(c-s) + (x+y)*s
; b = (y-x)*s + y*(c-s) = y*(c+s) - (x+y)*s
;
; Inverse: a = (x-y)*s + x*(c-s)
; b = (x-y)*s + y*(c+s)
;
; S advantage 16bit: 1ADD, 1SUB, 1MUL, 2MLA instead of 1SUB, 3MUL, 1MLA
; S advantage 32bit: 2ADD, 1SUB, 2SMULL, 1SMLAL instead of 1RSB, 2SMULL, 2SMLAL
; So S wins except for a very fast multiplier (eg 9E)
;
; NB The coefficients must always be the second operand on processor that
; take a variable number of cycles per multiply - so the FFT time remains constant
; This twiddle takes unpacked real and imaginary values
; Expects (cr,ci) = (c-s,s) on input
; Sets (cr,ci) = (a,b) on output
MACRO
TWIDDLE $xr, $xi, $cr, $ci, $t0, $t1
IF qshift>=0 :LAND: qshift<32
SUB $t1, $xr, $xi ; x-y
MUL $t0, $t1, $ci ; (x-y)*s
ADD $ci, $cr, $ci, LSL #1 ; ci = c+s allow mul to finish on SA
MLA $cr, $xr, $cr, $t0 ; a
MLA $ci, $xi, $ci, $t0 ; b
ELSE
ADD $t1, $cr, $ci, LSL #1 ; c+s
SMULL $t0, $cr, $xr, $cr ; x*(c-s)
SUB $xr, $xr, $xi ; x-y + allow mul to finish on SA
SMULL $t0, $ci, $xr, $ci ; (x-y)*s
ADD $cr, $cr, $ci ; a + allow mul to finish on SA
SMLAL $t0, $ci, $xi, $t1 ; b
ENDIF
MEND
; The following twiddle variant is similar to the above
; except that it is for an "E" processor varient. A standard
; 4 multiply twiddle is used as it requires the same number
; of cycles and needs less intermediate precision
;
; $co = coeficent real and imaginary (c,s) (packed)
; $xx = input data real and imaginary part (packed)
;
; $xr = destination register for real part of product
; $xi = destination register for imaginary part of product
;
; All registers should be distinct
;
MACRO
TWIDDLE_E $xr, $xi, $c0, $t0, $xx, $xxi
SMULBB $t0, $xx, $c0
SMULBT $xi, $xx, $c0
IF "$xxi"=""
SMULTT $xr, $xx, $c0
SMLATB $xi, $xx, $c0, $xi
ELSE
SMULBT $xr, $xxi, $c0
SMLABB $xi, $xxi, $c0, $xi
ENDIF
SUB $xr, $t0, $xr
MEND
; Scale data value in by the coefficient, writing result to out
; The coeficient must be the second multiplicand
; The post mul shift need not be done so in most cases this
; is just a multiply (unless you need higher precision)
; coef must be preserved
MACRO
SCALE $out, $in, $coef, $tmp
IF qshift>=0 :LAND: qshift<32
MUL $out, $in, $coef
ELSE
SMULL $tmp, $out, $in, $coef
ENDIF
MEND
MACRO
DECODEFORMAT $out, $format
GBLS $out.log
GBLS $out.format
$out.format SETS "$format"
IF "$format"="B"
$out.log SETS "1"
MEXIT
ENDIF
IF "$format"="H"
$out.log SETS "2"
MEXIT
ENDIF
IF "$format"="W"
$out.log SETS "3"
MEXIT
ENDIF
ERROR "Unrecognised format for $out: $format"
MEND
; generate a string in $var of the correct right shift
; amount - negative values = left shift
MACRO
SETSHIFT $var, $value
LCLA svalue
svalue SETA $value
$var SETS ""
IF svalue>0 :LAND: svalue<32
$var SETS ",ASR #0x$svalue"
ENDIF
svalue SETA -svalue
IF svalue>0 :LAND: svalue<32
$var SETS ",LSL #0x$svalue"
ENDIF
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; CODE to decipher the FFT options ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The $flags variable specifies the FFT options
; The global string $name is set to a textual version
; The global string $table is set the table name
MACRO
FFT_OPTIONS_STRING $flags, $name
GBLS $name
GBLS qname ; name of the precision (eg Q14, Q30)
GBLS direction ; name of the direction (eg I, F)
GBLS radix ; name of the radix (2, 4E, 4B, 4O etc)
GBLS intype ; name of input data type (if real)
GBLS prescale ; flag to indicate prescale
GBLS outpos ; position for the output data
GBLS datainformat ; bytes per input data item
GBLS dataformat ; bytes per working item
GBLS coefformat ; bytes per coefficient working item
GBLS coeforder ; R=(c,s) S=(c-s,s) storage format
GBLA datainlog ; shift to bytes per input complex
GBLA datalog ; shift to bytes per working complex
GBLA coeflog ; shift to bytes per coefficient complex
GBLA qshift ; right shift after multiply
GBLA norm
GBLA architecture ; 4=Arch4(7TDMI,SA), 5=Arch5TE(ARM9E)
GBLS cdshift
GBLS postmulshift
GBLS postldshift
GBLS postmulshift1
GBLS postldshift1
GBLL reversed ; flag to indicate input is already bit reversed
GBLS tablename
; find what sort of processor we are building the FFT for
architecture SETA 4 ; Architecture 4 (7TDMI, StrongARM etc)
;qname SETS {CPU}
; P $qname
IF ((({ARCHITECTURE}:CC:"aaaa"):LEFT:3="5TE") :LOR: (({ARCHITECTURE}:CC:"aa"):LEFT:1="6"))
architecture SETA 5 ; Architecture 5 (ARM9E, E extensions)
; P arch E
ENDIF
reversed SETL {FALSE}
; decode input order
IF ($flags:AND:FFT_INPUTORDER)=FFT_REVERSED
reversed SETL {TRUE}
ENDIF
; decode radix type to $radix
IF ($flags:AND:FFT_RADIX)=FFT_RADIX4
radix SETS "4E"
ENDIF
IF ($flags:AND:FFT_RADIX)=FFT_RADIX4_8F
radix SETS "4O"
ENDIF
IF ($flags:AND:FFT_RADIX)=FFT_RADIX4_2L
radix SETS "4B"
ENDIF
; decode direction to $direction
direction SETS "I"
; decode data size to $qname, and *log's
IF ($flags:AND:FFT_DATA_SIZES)=FFT_32bit
qname SETS "Q30"
datainlog SETA 3 ; 8 bytes per complex
datalog SETA 3
coeflog SETA 3
datainformat SETS "W"
dataformat SETS "W"
coefformat SETS "W"
qshift SETA -2 ; shift left top word of 32 bit result
ENDIF
IF ($flags:AND:FFT_DATA_SIZES)=FFT_16bit
qname SETS "Q14"
datainlog SETA 2
datalog SETA 2
coeflog SETA 2
datainformat SETS "H"
dataformat SETS "H"
coefformat SETS "H"
qshift SETA 14
ENDIF
; find the coefficient ordering
coeforder SETS "S"
IF (architecture>=5):LAND:(qshift<16)
coeforder SETS "R"
ENDIF
; decode real vs complex input data type
intype SETS ""
IF ($flags:AND:FFT_INPUTTYPE)=FFT_REAL
intype SETS "R"
ENDIF
; decode on outpos
outpos SETS ""
IF ($flags:AND:FFT_OUTPUTPOS)=FFT_OUT_INBUF
outpos SETS "I"
ENDIF
; decode on prescale
prescale SETS ""
IF ($flags:AND:FFT_INPUTSCALE)=FFT_PRESCALE
prescale SETS "P"
ENDIF
; decode on output scale
norm SETA 1
IF ($flags:AND:FFT_OUTPUTSCALE)=FFT_NONORM
norm SETA 0
ENDIF
; calculate shift to convert data offsets to coefficient offsets
SETSHIFT cdshift, ($datalog)-($coeflog)
$name SETS "$radix$direction$qname$intype$outpos$prescale"
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; FFT GENERATOR ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; FFT options bitfield
FFT_DIRECTION EQU 0x00000001 ; direction select bit
FFT_FORWARD EQU 0x00000000 ; forward exp(-ijkw) coefficient FFT
FFT_INVERSE EQU 0x00000001 ; inverse exp(+ijkw) coefficient FFT
FFT_INPUTORDER EQU 0x00000002 ; input order select field
FFT_BITREV EQU 0x00000000 ; input data is in normal order (bit reverse)
FFT_REVERSED EQU 0x00000002 ; assume input data is already bit revesed
FFT_INPUTSCALE EQU 0x00000004 ; select scale on input data
FFT_NOPRESCALE EQU 0x00000000 ; do not scale input data
FFT_PRESCALE EQU 0x00000004 ; scale input data up by a register amount
FFT_INPUTTYPE EQU 0x00000010 ; selector for real/complex input data
FFT_COMPLEX EQU 0x00000000 ; do complex FFT of N points
FFT_REAL EQU 0x00000010 ; do a 2*N point real FFT
FFT_OUTPUTPOS EQU 0x00000020 ; where is the output placed?
FFT_OUT_OUTBUF EQU 0x00000000 ; default - in the output buffer
FFT_OUT_INBUF EQU 0x00000020 ; copy it back to the input buffer
FFT_RADIX EQU 0x00000F00 ; radix select
FFT_RADIX4 EQU 0x00000000 ; radix 4 (log_2 N must be even)
FFT_RADIX4_8F EQU 0x00000100 ; radix 4 with radix 8 first stage
FFT_RADIX4_2L EQU 0x00000200 ; radix 4 with optional radix 2 last stage
FFT_OUTPUTSCALE EQU 0x00001000 ; select output scale value
FFT_NORMALISE EQU 0x00000000 ; default - divide by N during algorithm
FFT_NONORM EQU 0x00001000 ; calculate the raw sum (no scale)
FFT_DATA_SIZES EQU 0x000F0000
FFT_16bit EQU 0x00000000 ; 16-bit data and Q14 coefs
FFT_32bit EQU 0x00010000 ; 32-bit data and Q30 coefs
END

View File

@ -1,101 +0,0 @@
;
; $Copyright:
; ----------------------------------------------------------------
; This confidential and proprietary software may be used only as
; authorised by a licensing agreement from ARM Limited
; (C) COPYRIGHT 2000,2002 ARM Limited
; ALL RIGHTS RESERVED
; The entire notice above must be reproduced on all authorised
; copies and copies may only be made to the extent permitted
; by a licensing agreement from ARM Limited.
; ----------------------------------------------------------------
; File: fft_main.h,v
; Revision: 1.10
; ----------------------------------------------------------------
; $
;
; Optimised ARM assembler multi-radix FFT
; Please read the readme.txt before this file
;
INCLUDE fft_mac_forward.h ; general macros
INCLUDE fs_rad8_forward.h ; first stage, radix 8 macros
INCLUDE gs_rad4.h ; general stage, radix 4 macros
; The macro in this file generates a whole FFT by glueing together
; FFT stage macros. It is designed to handle a range of power-of-2
; FFT's, the power of 2 set at run time.
; The following should be set up:
;
; $flags = a 32-bit integer indicating what FFT code to generate
; formed by a bitmask of the above FFT_* flag definitions
; (see fft_mac.h)
;
; r0 = inptr = address of the input buffer
; r1 = dptr = address of the output buffer
; r2 = N = the number of points in the FFT
; r3 = = optional pre-left shift to apply to the input data
;
; The contents of the input buffer are preserved (provided that the
; input and output buffer are different, which must be the case unless
; no bitreversal is required and the input is provided pre-reversed).
MACRO
GENERATE_FFT $flags
; decode the options word
FFT_OPTIONS_STRING $flags, name
IF "$outpos"<>""
; stack the input buffer address for later on
STMFD sp!, {inptr}
ENDIF
; Do first stage - radix 4 or radix 8 depending on parity
IF "$radix"="4O"
FS_RAD8
tablename SETS "_8"
tablename SETS "$qname$coeforder$tablename"
ELSE
FS_RAD4
tablename SETS "_4"
tablename SETS "$qname$coeforder$tablename"
ENDIF
IMPORT t_$tablename
LDR cptr, =t_$tablename ; coefficient table
CMP count, #1
BEQ %FT10 ; exit for small case
12 ; General stage loop
GS_RAD4
CMP count, #2
BGT %BT12
IF "$radix"="4B"
; support odd parity as well
;BLT %FT10 ; less than 2 left (ie, finished)
;LS_RAD2 ; finish off with a radix 2 stage
ENDIF
10 ; we've finished the complex FFT
IF ($flags:AND:FFT_INPUTTYPE)=FFT_REAL
; convert to a real FFT
IF "$outpos"="I"
LDMFD sp!, {dout}
ELSE
MOV dout, dptr
ENDIF
; dinc = (N/2) >> datalog where N is the number of real points
IMPORT s_$tablename
LDR t0, = s_$tablename
LDR t0, [t0] ; max N handled by the table
MOV t1, dinc, LSR #($datalog-1) ; real N we want to handle
CMP t0, t1
MOV cinc, #3<<$coeflog ; radix 4 table stage
MOVEQ cinc, #1<<$coeflog ; radix 4 table stage
LS_ZTOR
ENDIF
MEND
END

View File

@ -1,101 +0,0 @@
;
; $Copyright:
; ----------------------------------------------------------------
; This confidential and proprietary software may be used only as
; authorised by a licensing agreement from ARM Limited
; (C) COPYRIGHT 2000,2002 ARM Limited
; ALL RIGHTS RESERVED
; The entire notice above must be reproduced on all authorised
; copies and copies may only be made to the extent permitted
; by a licensing agreement from ARM Limited.
; ----------------------------------------------------------------
; File: fft_main.h,v
; Revision: 1.10
; ----------------------------------------------------------------
; $
;
; Optimised ARM assembler multi-radix FFT
; Please read the readme.txt before this file
;
INCLUDE fft_mac_inverse.h ; general macros
INCLUDE fs_rad8_inverse.h ; first stage, radix 8 macros
INCLUDE gs_rad4.h ; general stage, radix 4 macros
; The macro in this file generates a whole FFT by glueing together
; FFT stage macros. It is designed to handle a range of power-of-2
; FFT's, the power of 2 set at run time.
; The following should be set up:
;
; $flags = a 32-bit integer indicating what FFT code to generate
; formed by a bitmask of the above FFT_* flag definitions
; (see fft_mac.h)
;
; r0 = inptr = address of the input buffer
; r1 = dptr = address of the output buffer
; r2 = N = the number of points in the FFT
; r3 = = optional pre-left shift to apply to the input data
;
; The contents of the input buffer are preserved (provided that the
; input and output buffer are different, which must be the case unless
; no bitreversal is required and the input is provided pre-reversed).
MACRO
GENERATE_FFT $flags
; decode the options word
FFT_OPTIONS_STRING $flags, name
IF "$outpos"<>""
; stack the input buffer address for later on
STMFD sp!, {inptr}
ENDIF
; Do first stage - radix 4 or radix 8 depending on parity
IF "$radix"="4O"
FS_RAD8
tablename SETS "_8"
tablename SETS "$qname$coeforder$tablename"
ELSE
FS_RAD4
tablename SETS "_4"
tablename SETS "$qname$coeforder$tablename"
ENDIF
IMPORT t_$tablename
LDR cptr, =t_$tablename ; coefficient table
CMP count, #1
BEQ %FT10 ; exit for small case
12 ; General stage loop
GS_RAD4
CMP count, #2
BGT %BT12
IF "$radix"="4B"
; support odd parity as well
;BLT %FT10 ; less than 2 left (ie, finished)
;LS_RAD2 ; finish off with a radix 2 stage
ENDIF
10 ; we've finished the complex FFT
IF ($flags:AND:FFT_INPUTTYPE)=FFT_REAL
; convert to a real FFT
IF "$outpos"="I"
LDMFD sp!, {dout}
ELSE
MOV dout, dptr
ENDIF
; dinc = (N/2) >> datalog where N is the number of real points
IMPORT s_$tablename
LDR t0, = s_$tablename
LDR t0, [t0] ; max N handled by the table
MOV t1, dinc, LSR #($datalog-1) ; real N we want to handle
CMP t0, t1
MOV cinc, #3<<$coeflog ; radix 4 table stage
MOVEQ cinc, #1<<$coeflog ; radix 4 table stage
LS_ZTOR
ENDIF
MEND
END

View File

@ -1,236 +0,0 @@
;
; $Copyright:
; ----------------------------------------------------------------
; This confidential and proprietary software may be used only as
; authorised by a licensing agreement from ARM Limited
; (C) COPYRIGHT 2000,2002 ARM Limited
; ALL RIGHTS RESERVED
; The entire notice above must be reproduced on all authorised
; copies and copies may only be made to the extent permitted
; by a licensing agreement from ARM Limited.
; ----------------------------------------------------------------
; File: fs_rad8.h,v
; Revision: 1.5
; ----------------------------------------------------------------
; $
;
; Optimised ARM assembler multi-radix FFT
; Please read the readme.txt before this file
;
; This file contains first stage, radix-8 code
; It bit reverses (assuming a power of 2 FFT) and performs the first stage
;
MACRO
FS_RAD8
SETSHIFT postldshift, 3*norm
SETSHIFT postmulshift, 3*norm+qshift
SETSHIFT postldshift1, 3*norm-1
SETSHIFT postmulshift1, 3*norm+qshift-1
IF "$prescale"<>""
STMFD sp!, {dptr, N, r3}
ELSE
STMFD sp!, {dptr, N}
ENDIF
MOV bitrev, #0
MOV dinc, N, LSL #($datalog-2)
12 ; first (radix 8) stage loop
; do first two (radix 2) stages
FIRST_STAGE_RADIX8_ODD dinc, "dinc, LSR #1", bitrev
FIRST_STAGE_RADIX8_EVEN dinc, bitrev
; third (radix 2) stage
LDMFD sp!, {x0r, x0i}
ADD $h0r, $h0r, x0r $postldshift ; standard add
ADD $h0i, $h0i, x0i $postldshift
SUB x0r, $h0r, x0r $postldshift1
SUB x0i, $h0i, x0i $postldshift1
STORE dptr, #1<<$datalog, $h0r, $h0i
LDMFD sp!, {x1r, x1i}
ADD $h1r, $h1r, x1r $postmulshift
ADD $h1i, $h1i, x1i $postmulshift
SUB x1r, $h1r, x1r $postmulshift1
SUB x1i, $h1i, x1i $postmulshift1
STORE dptr, #1<<$datalog, $h1r, $h1i
LDMFD sp!, {x2r, x2i}
SUBi $h2r, $h2r, x2r $postldshift ; note that x2r & x2i were
ADDi $h2i, $h2i, x2i $postldshift ; swapped above
ADDi x2r, $h2r, x2r $postldshift1
SUBi x2i, $h2i, x2i $postldshift1
STORE dptr, #1<<$datalog, $h2r, $h2i
LDMFD sp!, {x3r, x3i}
ADD $h3r, $h3r, x3r $postmulshift
ADD $h3i, $h3i, x3i $postmulshift
SUB x3r, $h3r, x3r $postmulshift1
SUB x3i, $h3i, x3i $postmulshift1
STORE dptr, #1<<$datalog, $h3r, $h3i
STORE dptr, #1<<$datalog, x0r, x0i
STORE dptr, #1<<$datalog, x1r, x1i
STORE dptr, #1<<$datalog, x2r, x2i
STORE dptr, #1<<$datalog, x3r, x3i
IF reversed
SUBS dinc, dinc, #2<<$datalog
BGT %BT12
ELSE
; increment the count in a bit reverse manner
EOR bitrev, bitrev, dinc, LSR #($datalog-2+4) ; t0 = (N/8)>>1
TST bitrev, dinc, LSR #($datalog-2+4)
BNE %BT12
; get here for 1/2 the loops - carry to next bit
EOR bitrev, bitrev, dinc, LSR #($datalog-2+5)
TST bitrev, dinc, LSR #($datalog-2+5)
BNE %BT12
; get here for 1/4 of the loops - stop unrolling
MOV t0, dinc, LSR #($datalog-2+6)
15 ; bit reverse increment loop
EOR bitrev, bitrev, t0
TST bitrev, t0
BNE %BT12
; get here for 1/8 of the loops (or when finished)
MOVS t0, t0, LSR #1 ; move down to next bit
BNE %BT15 ; carry on if we haven't run off the bottom
ENDIF
IF "$prescale"<>""
LDMFD sp!, {dptr, N, r3}
ELSE
LDMFD sp!, {dptr, N}
ENDIF
MOV count, N, LSR #3 ; start with N/8 blocks 8 each
MOV dinc, #8<<$datalog ; initial skip is 8 elements
MEND
MACRO
FIRST_STAGE_RADIX8_ODD $dinc, $dinc_lsr1, $bitrev
IF reversed
; load non bit reversed
ADD t0, inptr, #4<<$datalog
LOADDATAI t0, #1<<$datalog, x0r, x0i
LOADDATAI t0, #1<<$datalog, x1r, x1i
LOADDATAI t0, #1<<$datalog, x2r, x2i
LOADDATAI t0, #1<<$datalog, x3r, x3i
ELSE
; load data elements 1,3,5,7 into register order 1,5,3,7
ADD t0, inptr, $bitrev, LSL #$datalog
ADD t0, t0, $dinc_lsr1 ; load in odd terms first
LOADDATAI t0, $dinc, x0r, x0i
LOADDATAI t0, $dinc, x2r, x2i
LOADDATAI t0, $dinc, x1r, x1i
LOADDATAI t0, $dinc, x3r, x3i
ENDIF
IF "$prescale"="P"
LDR t0, [sp, #8]
MOV x0r, x0r, LSL t0
MOV x0i, x0i, LSL t0
MOV x1r, x1r, LSL t0
MOV x1i, x1i, LSL t0
MOV x2r, x2r, LSL t0
MOV x2i, x2i, LSL t0
MOV x3r, x3r, LSL t0
MOV x3i, x3i, LSL t0
ENDIF
SETREG h2, x3r, x3i
SETREG h3, t0, t1
; first stage (radix 2) butterflies
ADD x0r, x0r, x1r
ADD x0i, x0i, x1i
SUB x1r, x0r, x1r, LSL #1
SUB x1i, x0i, x1i, LSL #1
SUB $h3r, x2r, x3r
SUB $h3i, x2i, x3i
ADD $h2r, x2r, x3r
ADD $h2i, x2i, x3i
; second stage (radix 2) butterflies
SUB x2i, x0r, $h2r ; swap real and imag here
SUB x2r, x0i, $h2i ; for use later
ADD x0r, x0r, $h2r
ADD x0i, x0i, $h2i
ADDi x3r, x1r, $h3i
SUBi x3i, x1i, $h3r
SUBi x1r, x1r, $h3i
ADDi x1i, x1i, $h3r
; do the 1/sqrt(2) (+/-1 +/- i) twiddles for third stage
LCLS tempname
tempname SETS "R_rad8"
IMPORT t_$qname$tempname
LDR t1, =t_$qname$tempname
; IMPORT t_$qname.R_rad8
; LDR t1, =t_$qname.R_rad8
LOADCOEFR t1, t1
STMFD sp!, {dinc} ;;; FIXME!!!
ADD t0, x1r, x1i ; real part when * (1-i)
SCALE x1r, t0, t1, dinc ; scale by 1/sqrt(2)
RSB t0, t0, x1i, LSL #1 ; imag part when * (1-i)
SCALE x1i, t0, t1, dinc ; scale by 1/sqrt(2)
SUB t0, x3i, x3r ; real part when * (-1-i)
SCALE x3r, t0, t1, dinc ; scale by 1/sqrt(2)
SUB t0, t0, x3i, LSL #1 ; imag part when * (-1-i)
SCALE x3i, t0, t1, dinc ; scale by 1/sqrt(2)
LDMFD sp!, {dinc} ;;; FIXME!!!
STMFD sp!, {x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i}
MEND
MACRO
FIRST_STAGE_RADIX8_EVEN $dinc, $bitrev
; load elements 0,2,4,6 into register order 0,4,2,6
SETREGS h, x1r, x1i, x2r, x2i, x3r, x3i, t0, t1
SETREG g3, x0r, x0i
IF reversed
; load normally
LOADDATAI inptr, #1<<$datalog, $h0r, $h0i
LOADDATAI inptr, #1<<$datalog, $h1r, $h1i
LOADDATAI inptr, #1<<$datalog, $h2r, $h2i
LOADDATAI inptr, #1<<$datalog, $h3r, $h3i
ADD inptr, inptr, #4<<$datalog
ELSE
; load bit reversed
ADD x0r, inptr, $bitrev, LSL #$datalog
LOADDATAI x0r, $dinc, $h0r, $h0i
LOADDATAI x0r, $dinc, $h2r, $h2i
LOADDATAI x0r, $dinc, $h1r, $h1i
LOADDATAI x0r, $dinc, $h3r, $h3i
ENDIF
IF "$prescale"="P"
LDR x0r, [sp, #8+32] ; NB we've stacked 8 extra regs!
MOV $h0r, $h0r, LSL x0r
MOV $h0i, $h0i, LSL x0r
MOV $h1r, $h1r, LSL x0r
MOV $h1i, $h1i, LSL x0r
MOV $h2r, $h2r, LSL x0r
MOV $h2i, $h2i, LSL x0r
MOV $h3r, $h3r, LSL x0r
MOV $h3i, $h3i, LSL x0r
ENDIF
SHIFTDATA $h0r, $h0i
; first stage (radix 2) butterflies
ADD $h0r, $h0r, $h1r $postldshift
ADD $h0i, $h0i, $h1i $postldshift
SUB $h1r, $h0r, $h1r $postldshift1
SUB $h1i, $h0i, $h1i $postldshift1
SUB $g3r, $h2r, $h3r
SUB $g3i, $h2i, $h3i
ADD $h2r, $h2r, $h3r
ADD $h2i, $h2i, $h3i
; second stage (radix 2) butterflies
ADD $h0r, $h0r, $h2r $postldshift
ADD $h0i, $h0i, $h2i $postldshift
SUB $h2r, $h0r, $h2r $postldshift1
SUB $h2i, $h0i, $h2i $postldshift1
ADDi $h3r, $h1r, $g3i $postldshift
SUBi $h3i, $h1i, $g3r $postldshift
SUBi $h1r, $h1r, $g3i $postldshift
ADDi $h1i, $h1i, $g3r $postldshift
MEND
END

View File

@ -1,236 +0,0 @@
;
; $Copyright:
; ----------------------------------------------------------------
; This confidential and proprietary software may be used only as
; authorised by a licensing agreement from ARM Limited
; (C) COPYRIGHT 2000,2002 ARM Limited
; ALL RIGHTS RESERVED
; The entire notice above must be reproduced on all authorised
; copies and copies may only be made to the extent permitted
; by a licensing agreement from ARM Limited.
; ----------------------------------------------------------------
; File: fs_rad8.h,v
; Revision: 1.5
; ----------------------------------------------------------------
; $
;
; Optimised ARM assembler multi-radix FFT
; Please read the readme.txt before this file
;
; This file contains first stage, radix-8 code
; It bit reverses (assuming a power of 2 FFT) and performs the first stage
;
MACRO
FS_RAD8
SETSHIFT postldshift, 3*norm
SETSHIFT postmulshift, 3*norm+qshift
SETSHIFT postldshift1, 3*norm-1
SETSHIFT postmulshift1, 3*norm+qshift-1
IF "$prescale"<>""
STMFD sp!, {dptr, N, r3}
ELSE
STMFD sp!, {dptr, N}
ENDIF
MOV bitrev, #0
MOV dinc, N, LSL #($datalog-2)
12 ; first (radix 8) stage loop
; do first two (radix 2) stages
FIRST_STAGE_RADIX8_ODD dinc, "dinc, LSR #1", bitrev
FIRST_STAGE_RADIX8_EVEN dinc, bitrev
; third (radix 2) stage
LDMFD sp!, {x0r, x0i}
ADD $h0r, $h0r, x0r $postldshift ; standard add
ADD $h0i, $h0i, x0i $postldshift
SUB x0r, $h0r, x0r $postldshift1
SUB x0i, $h0i, x0i $postldshift1
STORE dptr, #1<<$datalog, $h0r, $h0i
LDMFD sp!, {x1r, x1i}
ADD $h1r, $h1r, x1r $postmulshift
ADD $h1i, $h1i, x1i $postmulshift
SUB x1r, $h1r, x1r $postmulshift1
SUB x1i, $h1i, x1i $postmulshift1
STORE dptr, #1<<$datalog, $h1r, $h1i
LDMFD sp!, {x2r, x2i}
SUBi $h2r, $h2r, x2r $postldshift ; note that x2r & x2i were
ADDi $h2i, $h2i, x2i $postldshift ; swapped above
ADDi x2r, $h2r, x2r $postldshift1
SUBi x2i, $h2i, x2i $postldshift1
STORE dptr, #1<<$datalog, $h2r, $h2i
LDMFD sp!, {x3r, x3i}
ADD $h3r, $h3r, x3r $postmulshift
ADD $h3i, $h3i, x3i $postmulshift
SUB x3r, $h3r, x3r $postmulshift1
SUB x3i, $h3i, x3i $postmulshift1
STORE dptr, #1<<$datalog, $h3r, $h3i
STORE dptr, #1<<$datalog, x0r, x0i
STORE dptr, #1<<$datalog, x1r, x1i
STORE dptr, #1<<$datalog, x2r, x2i
STORE dptr, #1<<$datalog, x3r, x3i
IF reversed
SUBS dinc, dinc, #2<<$datalog
BGT %BT12
ELSE
; increment the count in a bit reverse manner
EOR bitrev, bitrev, dinc, LSR #($datalog-2+4) ; t0 = (N/8)>>1
TST bitrev, dinc, LSR #($datalog-2+4)
BNE %BT12
; get here for 1/2 the loops - carry to next bit
EOR bitrev, bitrev, dinc, LSR #($datalog-2+5)
TST bitrev, dinc, LSR #($datalog-2+5)
BNE %BT12
; get here for 1/4 of the loops - stop unrolling
MOV t0, dinc, LSR #($datalog-2+6)
15 ; bit reverse increment loop
EOR bitrev, bitrev, t0
TST bitrev, t0
BNE %BT12
; get here for 1/8 of the loops (or when finished)
MOVS t0, t0, LSR #1 ; move down to next bit
BNE %BT15 ; carry on if we haven't run off the bottom
ENDIF
IF "$prescale"<>""
LDMFD sp!, {dptr, N, r3}
ELSE
LDMFD sp!, {dptr, N}
ENDIF
MOV count, N, LSR #3 ; start with N/8 blocks 8 each
MOV dinc, #8<<$datalog ; initial skip is 8 elements
MEND
MACRO
FIRST_STAGE_RADIX8_ODD $dinc, $dinc_lsr1, $bitrev
IF reversed
; load non bit reversed
ADD t0, inptr, #4<<$datalog
LOADDATAI t0, #1<<$datalog, x0r, x0i
LOADDATAI t0, #1<<$datalog, x1r, x1i
LOADDATAI t0, #1<<$datalog, x2r, x2i
LOADDATAI t0, #1<<$datalog, x3r, x3i
ELSE
; load data elements 1,3,5,7 into register order 1,5,3,7
ADD t0, inptr, $bitrev, LSL #$datalog
ADD t0, t0, $dinc_lsr1 ; load in odd terms first
LOADDATAI t0, $dinc, x0r, x0i
LOADDATAI t0, $dinc, x2r, x2i
LOADDATAI t0, $dinc, x1r, x1i
LOADDATAI t0, $dinc, x3r, x3i
ENDIF
IF "$prescale"="P"
LDR t0, [sp, #8]
MOV x0r, x0r, LSL t0
MOV x0i, x0i, LSL t0
MOV x1r, x1r, LSL t0
MOV x1i, x1i, LSL t0
MOV x2r, x2r, LSL t0
MOV x2i, x2i, LSL t0
MOV x3r, x3r, LSL t0
MOV x3i, x3i, LSL t0
ENDIF
SETREG h2, x3r, x3i
SETREG h3, t0, t1
; first stage (radix 2) butterflies
ADD x0r, x0r, x1r
ADD x0i, x0i, x1i
SUB x1r, x0r, x1r, LSL #1
SUB x1i, x0i, x1i, LSL #1
SUB $h3r, x2r, x3r
SUB $h3i, x2i, x3i
ADD $h2r, x2r, x3r
ADD $h2i, x2i, x3i
; second stage (radix 2) butterflies
SUB x2i, x0r, $h2r ; swap real and imag here
SUB x2r, x0i, $h2i ; for use later
ADD x0r, x0r, $h2r
ADD x0i, x0i, $h2i
ADDi x3r, x1r, $h3i
SUBi x3i, x1i, $h3r
SUBi x1r, x1r, $h3i
ADDi x1i, x1i, $h3r
; do the 1/sqrt(2) (+/-1 +/- i) twiddles for third stage
LCLS tempname
tempname SETS "R_rad8"
IMPORT t_$qname$tempname
LDR t1, =t_$qname$tempname
; IMPORT t_$qname.R_rad8
; LDR t1, =t_$qname.R_rad8
LOADCOEFR t1, t1
STMFD sp!, {dinc} ;;; FIXME!!!
SUB t0, x1r, x1i ; real part when * (1+i)
SCALE x1r, t0, t1, dinc ; scale by 1/sqrt(2)
ADD t0, t0, x1i, LSL #1 ; imag part when * (1+i)
SCALE x1i, t0, t1, dinc ; scale by 1/sqrt(2)
SUB t0, x3r, x3i ; imag part when * (-1+i)
SCALE x3i, t0, t1, dinc ; scale by 1/sqrt(2)
SUB t0, t0, x3r, LSL #1 ; real part when * (-1+i)
SCALE x3r, t0, t1, dinc ; scale by 1/sqrt(2)
LDMFD sp!, {dinc} ;;; FIXME!!!
STMFD sp!, {x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i}
MEND
MACRO
FIRST_STAGE_RADIX8_EVEN $dinc, $bitrev
; load elements 0,2,4,6 into register order 0,4,2,6
SETREGS h, x1r, x1i, x2r, x2i, x3r, x3i, t0, t1
SETREG g3, x0r, x0i
IF reversed
; load normally
LOADDATAI inptr, #1<<$datalog, $h0r, $h0i
LOADDATAI inptr, #1<<$datalog, $h1r, $h1i
LOADDATAI inptr, #1<<$datalog, $h2r, $h2i
LOADDATAI inptr, #1<<$datalog, $h3r, $h3i
ADD inptr, inptr, #4<<$datalog
ELSE
; load bit reversed
ADD x0r, inptr, $bitrev, LSL #$datalog
LOADDATAI x0r, $dinc, $h0r, $h0i
LOADDATAI x0r, $dinc, $h2r, $h2i
LOADDATAI x0r, $dinc, $h1r, $h1i
LOADDATAI x0r, $dinc, $h3r, $h3i
ENDIF
IF "$prescale"="P"
LDR x0r, [sp, #8+32] ; NB we've stacked 8 extra regs!
MOV $h0r, $h0r, LSL x0r
MOV $h0i, $h0i, LSL x0r
MOV $h1r, $h1r, LSL x0r
MOV $h1i, $h1i, LSL x0r
MOV $h2r, $h2r, LSL x0r
MOV $h2i, $h2i, LSL x0r
MOV $h3r, $h3r, LSL x0r
MOV $h3i, $h3i, LSL x0r
ENDIF
SHIFTDATA $h0r, $h0i
; first stage (radix 2) butterflies
ADD $h0r, $h0r, $h1r $postldshift
ADD $h0i, $h0i, $h1i $postldshift
SUB $h1r, $h0r, $h1r $postldshift1
SUB $h1i, $h0i, $h1i $postldshift1
SUB $g3r, $h2r, $h3r
SUB $g3i, $h2i, $h3i
ADD $h2r, $h2r, $h3r
ADD $h2i, $h2i, $h3i
; second stage (radix 2) butterflies
ADD $h0r, $h0r, $h2r $postldshift
ADD $h0i, $h0i, $h2i $postldshift
SUB $h2r, $h0r, $h2r $postldshift1
SUB $h2i, $h0i, $h2i $postldshift1
ADDi $h3r, $h1r, $g3i $postldshift
SUBi $h3i, $h1i, $g3r $postldshift
SUBi $h1r, $h1r, $g3i $postldshift
ADDi $h1i, $h1i, $g3r $postldshift
MEND
END

View File

@ -1,111 +0,0 @@
;
; $Copyright:
; ----------------------------------------------------------------
; This confidential and proprietary software may be used only as
; authorised by a licensing agreement from ARM Limited
; (C) COPYRIGHT 2000,2002 ARM Limited
; ALL RIGHTS RESERVED
; The entire notice above must be reproduced on all authorised
; copies and copies may only be made to the extent permitted
; by a licensing agreement from ARM Limited.
; ----------------------------------------------------------------
; File: gs_rad4.h,v
; Revision: 1.8
; ----------------------------------------------------------------
; $
;
; Optimised ARM assembler multi-radix FFT
; Please read the readme.txt before this file
;
; This file contains the general stage, radix 4 macro
MACRO
GS_RAD4
SETSHIFT postldshift, 2*norm
SETSHIFT postmulshift, 2*norm+qshift
; dinc contains the number of bytes between the values to read
; for the radix 4 bufferfly
; Thus:
; dinc*4 = number of bytes between the blocks at this level
; dinc>>datalog = number of elements in each block at this level
MOV count, count, LSR #2 ; a quarter the blocks per stage
STMFD sp!, {dptr, count}
ADD t0, dinc, dinc, LSL #1 ; 3*dinc
ADD dptr, dptr, t0 ; move to last of 4 butterflys
SUB count, count, #1<<16 ; prepare top half of counter
12 ; block loop
; set top half of counter to (elements/block - 1)
ADD count, count, dinc, LSL #(16-$datalog)
15 ; butterfly loop
IF (architecture>=5):LAND:(qshift<16)
; E extensions available (21 cycles)
; But needs a different table format
LDMIA cptr!, {x0i, x1i, x2i}
LDR x2r, [dptr], -dinc
LDR x1r, [dptr], -dinc
LDR x0r, [dptr], -dinc
TWIDDLE_E x3r, x3i, x2i, t0, x2r
TWIDDLE_E x2r, x2i, x1i, t0, x1r
TWIDDLE_E x1r, x1i, x0i, t0, x0r
ELSE
; load next three twiddle factors (66 @ 4 cycles/mul)
LOADCOEFS cptr, x1r, x1i, x2r, x2i, x3r, x3i
; load data in reversed order & perform twiddles
LOADDATA dptr, -dinc, x0r, x0i
TWIDDLE x0r, x0i, x3r, x3i, t0, t1
LOADDATA dptr, -dinc, x0r, x0i
TWIDDLE x0r, x0i, x2r, x2i, t0, t1
LOADDATA dptr, -dinc, x0r, x0i
TWIDDLE x0r, x0i, x1r, x1i, t0, t1
ENDIF
LOADDATAZ dptr, x0r, x0i
SHIFTDATA x0r, x0i
; now calculate the h's
; h[0,k] = g[0,k] + g[2,k]
; h[1,k] = g[0,k] - g[2,k]
; h[2,k] = g[1,k] + g[3,k]
; h[3,k] = g[1,k] - g[3,k]
SETREGS h,t0,t1,x0r,x0i,x1r,x1i,x2r,x2i
ADD $h0r, x0r, x1r $postmulshift
ADD $h0i, x0i, x1i $postmulshift
SUB $h1r, x0r, x1r $postmulshift
SUB $h1i, x0i, x1i $postmulshift
ADD $h2r, x2r, x3r
ADD $h2i, x2i, x3i
SUB $h3r, x2r, x3r
SUB $h3i, x2i, x3i
; now calculate the y's and store results
; y[0*N/4+k] = h[0,k] + h[2,k]
; y[1*N/4+k] = h[1,k] + j*h[3,k]
; y[2*N/4+k] = h[0,k] - h[2,k]
; y[3*N/4+k] = h[1,k] - j*h[3,k]
SETREG y0,x3r,x3i
ADD $y0r, $h0r, $h2r $postmulshift
ADD $y0i, $h0i, $h2i $postmulshift
STORE dptr, dinc, $y0r, $y0i
SUBi $y0r, $h1r, $h3i $postmulshift
ADDi $y0i, $h1i, $h3r $postmulshift
STORE dptr, dinc, $y0r, $y0i
SUB $y0r, $h0r, $h2r $postmulshift
SUB $y0i, $h0i, $h2i $postmulshift
STORE dptr, dinc, $y0r, $y0i
ADDi $y0r, $h1r, $h3i $postmulshift
SUBi $y0i, $h1i, $h3r $postmulshift
STOREP dptr, $y0r, $y0i
; continue butterfly loop
SUBS count, count, #1<<16
BGE %BT15
; decrement counts for block loop
ADD t0, dinc, dinc, LSL #1 ; dinc * 3
ADD dptr, dptr, t0 ; move onto next block
SUB cptr, cptr, t0 $cdshift ; move back to coeficients start
SUB count, count, #1 ; done one more block
MOVS t1, count, LSL #16
BNE %BT12 ; still more blocks to do
; finished stage
ADD cptr, cptr, t0 $cdshift ; move onto next stage coeficients
LDMFD sp!, {dptr, count}
MOV dinc, dinc, LSL #2 ; four times the entries per block
MEND
END

View File

@ -1,91 +0,0 @@
# $Copyright:
# ----------------------------------------------------------------
# This confidential and proprietary software may be used only as
# authorised by a licensing agreement from ARM Limited
# (C) COPYRIGHT 2000,2002 ARM Limited
# ALL RIGHTS RESERVED
# The entire notice above must be reproduced on all authorised
# copies and copies may only be made to the extent permitted
# by a licensing agreement from ARM Limited.
# ----------------------------------------------------------------
# File: readme.txt,v
# Revision: 1.4
# ----------------------------------------------------------------
# $
!!! To fully understand the FFT/ARM9E/WIN_MOB implementation in SPLIB,
!!! you have to refer to the full set of files in RVDS' package:
!!! C:\Program Files\ARM\RVDS\Examples\3.0\79\windows\fft_v5te.
ARM Assembler FFT implementation
================================
Overview
========
This implementation has been restructured to allow FFT's of varying radix
rather than the fixed radix-2 or radix-4 versions allowed earlier. The
implementation of an optimised assembler FFT of a given size (N points)
consists of chaining together a sequence of stages 1,2,3,...,k such that the
j'th stage has radix Rj and:
N = R1*R2*R3*...*Rk
For the ARM implementations we keep the size of the Rj's decreasing with
increasing j, EXCEPT that if there are any non power of 2 factors (ie, odd
prime factors) then these come before all the power of 2 factors.
For example:
N=64 would be implemented as stages:
radix 4, radix 4, radix 4
N=128 would be implemented as stages:
radix 8, radix 4, radix 4
OR
radix 4, radix 4, radix 4, radix 2
N=192 would be implemented as stages:
radix 3, radix 4, radix 4, radix 4
The bitreversal is usally combined with the first stage where possible.
Structure
=========
The actual FFT routine is built out of a hierarchy of macros. All stage
macros and filenames are one of:
fs_rad<n> => the macro implements a radix <n> First Stage (usually
including the bit reversal)
gs_rad<n> => the macro implements a radix <n> General Stage (any
stage except the first - includes the twiddle operations)
ls_rad<n> => the macro implements a radix <n> Last Stage (this macro
is like the gs_rad<n> version but is optimised for
efficiency in the last stage)
ls_ztor => this macro converts the output of a complex FFT to
be the first half of the output of a real FFT of
double the number of input points.
Other files are:
fft_mac.h => Macro's and register definitions shared by all radix
implementations
fft_main.h => Main FFT macros drawing together the stage macros
to produce a complete FFT
Interfaces
==========
The register interfaces for the different type of stage macros are described
at the start of fft_mac.h

View File

@ -1,695 +0,0 @@
/*
* Copyright (C) ARM Limited 1998-2002. All rights reserved.
*
* t_01024_8.c
*
*/
extern const int s_Q14S_8;
const int s_Q14S_8 = 1024;
extern const unsigned short t_Q14S_8[2032];
const unsigned short t_Q14S_8[2032] = {
0x4000,0x0000 ,0x4000,0x0000 ,0x4000,0x0000 ,
0x22a3,0x187e ,0x3249,0x0c7c ,0x11a8,0x238e ,
0x0000,0x2d41 ,0x22a3,0x187e ,0xdd5d,0x3b21 ,
0xdd5d,0x3b21 ,0x11a8,0x238e ,0xb4be,0x3ec5 ,
0xc000,0x4000 ,0x0000,0x2d41 ,0xa57e,0x2d41 ,
0xac61,0x3b21 ,0xee58,0x3537 ,0xb4be,0x0c7c ,
0xa57e,0x2d41 ,0xdd5d,0x3b21 ,0xdd5d,0xe782 ,
0xac61,0x187e ,0xcdb7,0x3ec5 ,0x11a8,0xcac9 ,
0x4000,0x0000 ,0x4000,0x0000 ,0x4000,0x0000 ,
0x396b,0x0646 ,0x3cc8,0x0324 ,0x35eb,0x0964 ,
0x3249,0x0c7c ,0x396b,0x0646 ,0x2aaa,0x1294 ,
0x2aaa,0x1294 ,0x35eb,0x0964 ,0x1e7e,0x1b5d ,
0x22a3,0x187e ,0x3249,0x0c7c ,0x11a8,0x238e ,
0x1a46,0x1e2b ,0x2e88,0x0f8d ,0x0471,0x2afb ,
0x11a8,0x238e ,0x2aaa,0x1294 ,0xf721,0x3179 ,
0x08df,0x289a ,0x26b3,0x1590 ,0xea02,0x36e5 ,
0x0000,0x2d41 ,0x22a3,0x187e ,0xdd5d,0x3b21 ,
0xf721,0x3179 ,0x1e7e,0x1b5d ,0xd178,0x3e15 ,
0xee58,0x3537 ,0x1a46,0x1e2b ,0xc695,0x3fb1 ,
0xe5ba,0x3871 ,0x15fe,0x20e7 ,0xbcf0,0x3fec ,
0xdd5d,0x3b21 ,0x11a8,0x238e ,0xb4be,0x3ec5 ,
0xd556,0x3d3f ,0x0d48,0x2620 ,0xae2e,0x3c42 ,
0xcdb7,0x3ec5 ,0x08df,0x289a ,0xa963,0x3871 ,
0xc695,0x3fb1 ,0x0471,0x2afb ,0xa678,0x3368 ,
0xc000,0x4000 ,0x0000,0x2d41 ,0xa57e,0x2d41 ,
0xba09,0x3fb1 ,0xfb8f,0x2f6c ,0xa678,0x2620 ,
0xb4be,0x3ec5 ,0xf721,0x3179 ,0xa963,0x1e2b ,
0xb02d,0x3d3f ,0xf2b8,0x3368 ,0xae2e,0x1590 ,
0xac61,0x3b21 ,0xee58,0x3537 ,0xb4be,0x0c7c ,
0xa963,0x3871 ,0xea02,0x36e5 ,0xbcf0,0x0324 ,
0xa73b,0x3537 ,0xe5ba,0x3871 ,0xc695,0xf9ba ,
0xa5ed,0x3179 ,0xe182,0x39db ,0xd178,0xf073 ,
0xa57e,0x2d41 ,0xdd5d,0x3b21 ,0xdd5d,0xe782 ,
0xa5ed,0x289a ,0xd94d,0x3c42 ,0xea02,0xdf19 ,
0xa73b,0x238e ,0xd556,0x3d3f ,0xf721,0xd766 ,
0xa963,0x1e2b ,0xd178,0x3e15 ,0x0471,0xd094 ,
0xac61,0x187e ,0xcdb7,0x3ec5 ,0x11a8,0xcac9 ,
0xb02d,0x1294 ,0xca15,0x3f4f ,0x1e7e,0xc625 ,
0xb4be,0x0c7c ,0xc695,0x3fb1 ,0x2aaa,0xc2c1 ,
0xba09,0x0646 ,0xc338,0x3fec ,0x35eb,0xc0b1 ,
0x4000,0x0000 ,0x4000,0x0000 ,0x4000,0x0000 ,
0x3e69,0x0192 ,0x3f36,0x00c9 ,0x3d9a,0x025b ,
0x3cc8,0x0324 ,0x3e69,0x0192 ,0x3b1e,0x04b5 ,
0x3b1e,0x04b5 ,0x3d9a,0x025b ,0x388e,0x070e ,
0x396b,0x0646 ,0x3cc8,0x0324 ,0x35eb,0x0964 ,
0x37af,0x07d6 ,0x3bf4,0x03ed ,0x3334,0x0bb7 ,
0x35eb,0x0964 ,0x3b1e,0x04b5 ,0x306c,0x0e06 ,
0x341e,0x0af1 ,0x3a46,0x057e ,0x2d93,0x1050 ,
0x3249,0x0c7c ,0x396b,0x0646 ,0x2aaa,0x1294 ,
0x306c,0x0e06 ,0x388e,0x070e ,0x27b3,0x14d2 ,
0x2e88,0x0f8d ,0x37af,0x07d6 ,0x24ae,0x1709 ,
0x2c9d,0x1112 ,0x36ce,0x089d ,0x219c,0x1937 ,
0x2aaa,0x1294 ,0x35eb,0x0964 ,0x1e7e,0x1b5d ,
0x28b2,0x1413 ,0x3505,0x0a2b ,0x1b56,0x1d79 ,
0x26b3,0x1590 ,0x341e,0x0af1 ,0x1824,0x1f8c ,
0x24ae,0x1709 ,0x3334,0x0bb7 ,0x14ea,0x2193 ,
0x22a3,0x187e ,0x3249,0x0c7c ,0x11a8,0x238e ,
0x2093,0x19ef ,0x315b,0x0d41 ,0x0e61,0x257e ,
0x1e7e,0x1b5d ,0x306c,0x0e06 ,0x0b14,0x2760 ,
0x1c64,0x1cc6 ,0x2f7b,0x0eca ,0x07c4,0x2935 ,
0x1a46,0x1e2b ,0x2e88,0x0f8d ,0x0471,0x2afb ,
0x1824,0x1f8c ,0x2d93,0x1050 ,0x011c,0x2cb2 ,
0x15fe,0x20e7 ,0x2c9d,0x1112 ,0xfdc7,0x2e5a ,
0x13d5,0x223d ,0x2ba4,0x11d3 ,0xfa73,0x2ff2 ,
0x11a8,0x238e ,0x2aaa,0x1294 ,0xf721,0x3179 ,
0x0f79,0x24da ,0x29af,0x1354 ,0xf3d2,0x32ef ,
0x0d48,0x2620 ,0x28b2,0x1413 ,0xf087,0x3453 ,
0x0b14,0x2760 ,0x27b3,0x14d2 ,0xed41,0x35a5 ,
0x08df,0x289a ,0x26b3,0x1590 ,0xea02,0x36e5 ,
0x06a9,0x29ce ,0x25b1,0x164c ,0xe6cb,0x3812 ,
0x0471,0x2afb ,0x24ae,0x1709 ,0xe39c,0x392b ,
0x0239,0x2c21 ,0x23a9,0x17c4 ,0xe077,0x3a30 ,
0x0000,0x2d41 ,0x22a3,0x187e ,0xdd5d,0x3b21 ,
0xfdc7,0x2e5a ,0x219c,0x1937 ,0xda4f,0x3bfd ,
0xfb8f,0x2f6c ,0x2093,0x19ef ,0xd74e,0x3cc5 ,
0xf957,0x3076 ,0x1f89,0x1aa7 ,0xd45c,0x3d78 ,
0xf721,0x3179 ,0x1e7e,0x1b5d ,0xd178,0x3e15 ,
0xf4ec,0x3274 ,0x1d72,0x1c12 ,0xcea5,0x3e9d ,
0xf2b8,0x3368 ,0x1c64,0x1cc6 ,0xcbe2,0x3f0f ,
0xf087,0x3453 ,0x1b56,0x1d79 ,0xc932,0x3f6b ,
0xee58,0x3537 ,0x1a46,0x1e2b ,0xc695,0x3fb1 ,
0xec2b,0x3612 ,0x1935,0x1edc ,0xc40c,0x3fe1 ,
0xea02,0x36e5 ,0x1824,0x1f8c ,0xc197,0x3ffb ,
0xe7dc,0x37b0 ,0x1711,0x203a ,0xbf38,0x3fff ,
0xe5ba,0x3871 ,0x15fe,0x20e7 ,0xbcf0,0x3fec ,
0xe39c,0x392b ,0x14ea,0x2193 ,0xbabf,0x3fc4 ,
0xe182,0x39db ,0x13d5,0x223d ,0xb8a6,0x3f85 ,
0xdf6d,0x3a82 ,0x12bf,0x22e7 ,0xb6a5,0x3f30 ,
0xdd5d,0x3b21 ,0x11a8,0x238e ,0xb4be,0x3ec5 ,
0xdb52,0x3bb6 ,0x1091,0x2435 ,0xb2f2,0x3e45 ,
0xd94d,0x3c42 ,0x0f79,0x24da ,0xb140,0x3daf ,
0xd74e,0x3cc5 ,0x0e61,0x257e ,0xafa9,0x3d03 ,
0xd556,0x3d3f ,0x0d48,0x2620 ,0xae2e,0x3c42 ,
0xd363,0x3daf ,0x0c2e,0x26c1 ,0xacd0,0x3b6d ,
0xd178,0x3e15 ,0x0b14,0x2760 ,0xab8e,0x3a82 ,
0xcf94,0x3e72 ,0x09fa,0x27fe ,0xaa6a,0x3984 ,
0xcdb7,0x3ec5 ,0x08df,0x289a ,0xa963,0x3871 ,
0xcbe2,0x3f0f ,0x07c4,0x2935 ,0xa87b,0x374b ,
0xca15,0x3f4f ,0x06a9,0x29ce ,0xa7b1,0x3612 ,
0xc851,0x3f85 ,0x058d,0x2a65 ,0xa705,0x34c6 ,
0xc695,0x3fb1 ,0x0471,0x2afb ,0xa678,0x3368 ,
0xc4e2,0x3fd4 ,0x0355,0x2b8f ,0xa60b,0x31f8 ,
0xc338,0x3fec ,0x0239,0x2c21 ,0xa5bc,0x3076 ,
0xc197,0x3ffb ,0x011c,0x2cb2 ,0xa58d,0x2ee4 ,
0xc000,0x4000 ,0x0000,0x2d41 ,0xa57e,0x2d41 ,
0xbe73,0x3ffb ,0xfee4,0x2dcf ,0xa58d,0x2b8f ,
0xbcf0,0x3fec ,0xfdc7,0x2e5a ,0xa5bc,0x29ce ,
0xbb77,0x3fd4 ,0xfcab,0x2ee4 ,0xa60b,0x27fe ,
0xba09,0x3fb1 ,0xfb8f,0x2f6c ,0xa678,0x2620 ,
0xb8a6,0x3f85 ,0xfa73,0x2ff2 ,0xa705,0x2435 ,
0xb74d,0x3f4f ,0xf957,0x3076 ,0xa7b1,0x223d ,
0xb600,0x3f0f ,0xf83c,0x30f9 ,0xa87b,0x203a ,
0xb4be,0x3ec5 ,0xf721,0x3179 ,0xa963,0x1e2b ,
0xb388,0x3e72 ,0xf606,0x31f8 ,0xaa6a,0x1c12 ,
0xb25e,0x3e15 ,0xf4ec,0x3274 ,0xab8e,0x19ef ,
0xb140,0x3daf ,0xf3d2,0x32ef ,0xacd0,0x17c4 ,
0xb02d,0x3d3f ,0xf2b8,0x3368 ,0xae2e,0x1590 ,
0xaf28,0x3cc5 ,0xf19f,0x33df ,0xafa9,0x1354 ,
0xae2e,0x3c42 ,0xf087,0x3453 ,0xb140,0x1112 ,
0xad41,0x3bb6 ,0xef6f,0x34c6 ,0xb2f2,0x0eca ,
0xac61,0x3b21 ,0xee58,0x3537 ,0xb4be,0x0c7c ,
0xab8e,0x3a82 ,0xed41,0x35a5 ,0xb6a5,0x0a2b ,
0xaac8,0x39db ,0xec2b,0x3612 ,0xb8a6,0x07d6 ,
0xaa0f,0x392b ,0xeb16,0x367d ,0xbabf,0x057e ,
0xa963,0x3871 ,0xea02,0x36e5 ,0xbcf0,0x0324 ,
0xa8c5,0x37b0 ,0xe8ef,0x374b ,0xbf38,0x00c9 ,
0xa834,0x36e5 ,0xe7dc,0x37b0 ,0xc197,0xfe6e ,
0xa7b1,0x3612 ,0xe6cb,0x3812 ,0xc40c,0xfc13 ,
0xa73b,0x3537 ,0xe5ba,0x3871 ,0xc695,0xf9ba ,
0xa6d3,0x3453 ,0xe4aa,0x38cf ,0xc932,0xf763 ,
0xa678,0x3368 ,0xe39c,0x392b ,0xcbe2,0xf50f ,
0xa62c,0x3274 ,0xe28e,0x3984 ,0xcea5,0xf2bf ,
0xa5ed,0x3179 ,0xe182,0x39db ,0xd178,0xf073 ,
0xa5bc,0x3076 ,0xe077,0x3a30 ,0xd45c,0xee2d ,
0xa599,0x2f6c ,0xdf6d,0x3a82 ,0xd74e,0xebed ,
0xa585,0x2e5a ,0xde64,0x3ad3 ,0xda4f,0xe9b4 ,
0xa57e,0x2d41 ,0xdd5d,0x3b21 ,0xdd5d,0xe782 ,
0xa585,0x2c21 ,0xdc57,0x3b6d ,0xe077,0xe559 ,
0xa599,0x2afb ,0xdb52,0x3bb6 ,0xe39c,0xe33a ,
0xa5bc,0x29ce ,0xda4f,0x3bfd ,0xe6cb,0xe124 ,
0xa5ed,0x289a ,0xd94d,0x3c42 ,0xea02,0xdf19 ,
0xa62c,0x2760 ,0xd84d,0x3c85 ,0xed41,0xdd19 ,
0xa678,0x2620 ,0xd74e,0x3cc5 ,0xf087,0xdb26 ,
0xa6d3,0x24da ,0xd651,0x3d03 ,0xf3d2,0xd93f ,
0xa73b,0x238e ,0xd556,0x3d3f ,0xf721,0xd766 ,
0xa7b1,0x223d ,0xd45c,0x3d78 ,0xfa73,0xd59b ,
0xa834,0x20e7 ,0xd363,0x3daf ,0xfdc7,0xd3df ,
0xa8c5,0x1f8c ,0xd26d,0x3de3 ,0x011c,0xd231 ,
0xa963,0x1e2b ,0xd178,0x3e15 ,0x0471,0xd094 ,
0xaa0f,0x1cc6 ,0xd085,0x3e45 ,0x07c4,0xcf07 ,
0xaac8,0x1b5d ,0xcf94,0x3e72 ,0x0b14,0xcd8c ,
0xab8e,0x19ef ,0xcea5,0x3e9d ,0x0e61,0xcc21 ,
0xac61,0x187e ,0xcdb7,0x3ec5 ,0x11a8,0xcac9 ,
0xad41,0x1709 ,0xcccc,0x3eeb ,0x14ea,0xc983 ,
0xae2e,0x1590 ,0xcbe2,0x3f0f ,0x1824,0xc850 ,
0xaf28,0x1413 ,0xcafb,0x3f30 ,0x1b56,0xc731 ,
0xb02d,0x1294 ,0xca15,0x3f4f ,0x1e7e,0xc625 ,
0xb140,0x1112 ,0xc932,0x3f6b ,0x219c,0xc52d ,
0xb25e,0x0f8d ,0xc851,0x3f85 ,0x24ae,0xc44a ,
0xb388,0x0e06 ,0xc772,0x3f9c ,0x27b3,0xc37b ,
0xb4be,0x0c7c ,0xc695,0x3fb1 ,0x2aaa,0xc2c1 ,
0xb600,0x0af1 ,0xc5ba,0x3fc4 ,0x2d93,0xc21d ,
0xb74d,0x0964 ,0xc4e2,0x3fd4 ,0x306c,0xc18e ,
0xb8a6,0x07d6 ,0xc40c,0x3fe1 ,0x3334,0xc115 ,
0xba09,0x0646 ,0xc338,0x3fec ,0x35eb,0xc0b1 ,
0xbb77,0x04b5 ,0xc266,0x3ff5 ,0x388e,0xc064 ,
0xbcf0,0x0324 ,0xc197,0x3ffb ,0x3b1e,0xc02c ,
0xbe73,0x0192 ,0xc0ca,0x3fff ,0x3d9a,0xc00b ,
0x4000,0x0000 ,0x3f9b,0x0065 ,0x3f36,0x00c9 ,
0x3ed0,0x012e ,0x3e69,0x0192 ,0x3e02,0x01f7 ,
0x3d9a,0x025b ,0x3d31,0x02c0 ,0x3cc8,0x0324 ,
0x3c5f,0x0388 ,0x3bf4,0x03ed ,0x3b8a,0x0451 ,
0x3b1e,0x04b5 ,0x3ab2,0x051a ,0x3a46,0x057e ,
0x39d9,0x05e2 ,0x396b,0x0646 ,0x38fd,0x06aa ,
0x388e,0x070e ,0x381f,0x0772 ,0x37af,0x07d6 ,
0x373f,0x0839 ,0x36ce,0x089d ,0x365d,0x0901 ,
0x35eb,0x0964 ,0x3578,0x09c7 ,0x3505,0x0a2b ,
0x3492,0x0a8e ,0x341e,0x0af1 ,0x33a9,0x0b54 ,
0x3334,0x0bb7 ,0x32bf,0x0c1a ,0x3249,0x0c7c ,
0x31d2,0x0cdf ,0x315b,0x0d41 ,0x30e4,0x0da4 ,
0x306c,0x0e06 ,0x2ff4,0x0e68 ,0x2f7b,0x0eca ,
0x2f02,0x0f2b ,0x2e88,0x0f8d ,0x2e0e,0x0fee ,
0x2d93,0x1050 ,0x2d18,0x10b1 ,0x2c9d,0x1112 ,
0x2c21,0x1173 ,0x2ba4,0x11d3 ,0x2b28,0x1234 ,
0x2aaa,0x1294 ,0x2a2d,0x12f4 ,0x29af,0x1354 ,
0x2931,0x13b4 ,0x28b2,0x1413 ,0x2833,0x1473 ,
0x27b3,0x14d2 ,0x2733,0x1531 ,0x26b3,0x1590 ,
0x2632,0x15ee ,0x25b1,0x164c ,0x252f,0x16ab ,
0x24ae,0x1709 ,0x242b,0x1766 ,0x23a9,0x17c4 ,
0x2326,0x1821 ,0x22a3,0x187e ,0x221f,0x18db ,
0x219c,0x1937 ,0x2117,0x1993 ,0x2093,0x19ef ,
0x200e,0x1a4b ,0x1f89,0x1aa7 ,0x1f04,0x1b02 ,
0x1e7e,0x1b5d ,0x1df8,0x1bb8 ,0x1d72,0x1c12 ,
0x1ceb,0x1c6c ,0x1c64,0x1cc6 ,0x1bdd,0x1d20 ,
0x1b56,0x1d79 ,0x1ace,0x1dd3 ,0x1a46,0x1e2b ,
0x19be,0x1e84 ,0x1935,0x1edc ,0x18ad,0x1f34 ,
0x1824,0x1f8c ,0x179b,0x1fe3 ,0x1711,0x203a ,
0x1688,0x2091 ,0x15fe,0x20e7 ,0x1574,0x213d ,
0x14ea,0x2193 ,0x145f,0x21e8 ,0x13d5,0x223d ,
0x134a,0x2292 ,0x12bf,0x22e7 ,0x1234,0x233b ,
0x11a8,0x238e ,0x111d,0x23e2 ,0x1091,0x2435 ,
0x1005,0x2488 ,0x0f79,0x24da ,0x0eed,0x252c ,
0x0e61,0x257e ,0x0dd4,0x25cf ,0x0d48,0x2620 ,
0x0cbb,0x2671 ,0x0c2e,0x26c1 ,0x0ba1,0x2711 ,
0x0b14,0x2760 ,0x0a87,0x27af ,0x09fa,0x27fe ,
0x096d,0x284c ,0x08df,0x289a ,0x0852,0x28e7 ,
0x07c4,0x2935 ,0x0736,0x2981 ,0x06a9,0x29ce ,
0x061b,0x2a1a ,0x058d,0x2a65 ,0x04ff,0x2ab0 ,
0x0471,0x2afb ,0x03e3,0x2b45 ,0x0355,0x2b8f ,
0x02c7,0x2bd8 ,0x0239,0x2c21 ,0x01aa,0x2c6a ,
0x011c,0x2cb2 ,0x008e,0x2cfa ,0x0000,0x2d41 ,
0xff72,0x2d88 ,0xfee4,0x2dcf ,0xfe56,0x2e15 ,
0xfdc7,0x2e5a ,0xfd39,0x2e9f ,0xfcab,0x2ee4 ,
0xfc1d,0x2f28 ,0xfb8f,0x2f6c ,0xfb01,0x2faf ,
0xfa73,0x2ff2 ,0xf9e5,0x3034 ,0xf957,0x3076 ,
0xf8ca,0x30b8 ,0xf83c,0x30f9 ,0xf7ae,0x3139 ,
0xf721,0x3179 ,0xf693,0x31b9 ,0xf606,0x31f8 ,
0xf579,0x3236 ,0xf4ec,0x3274 ,0xf45f,0x32b2 ,
0xf3d2,0x32ef ,0xf345,0x332c ,0xf2b8,0x3368 ,
0xf22c,0x33a3 ,0xf19f,0x33df ,0xf113,0x3419 ,
0xf087,0x3453 ,0xeffb,0x348d ,0xef6f,0x34c6 ,
0xeee3,0x34ff ,0xee58,0x3537 ,0xedcc,0x356e ,
0xed41,0x35a5 ,0xecb6,0x35dc ,0xec2b,0x3612 ,
0xeba1,0x3648 ,0xeb16,0x367d ,0xea8c,0x36b1 ,
0xea02,0x36e5 ,0xe978,0x3718 ,0xe8ef,0x374b ,
0xe865,0x377e ,0xe7dc,0x37b0 ,0xe753,0x37e1 ,
0xe6cb,0x3812 ,0xe642,0x3842 ,0xe5ba,0x3871 ,
0xe532,0x38a1 ,0xe4aa,0x38cf ,0xe423,0x38fd ,
0xe39c,0x392b ,0xe315,0x3958 ,0xe28e,0x3984 ,
0xe208,0x39b0 ,0xe182,0x39db ,0xe0fc,0x3a06 ,
0xe077,0x3a30 ,0xdff2,0x3a59 ,0xdf6d,0x3a82 ,
0xdee9,0x3aab ,0xde64,0x3ad3 ,0xdde1,0x3afa ,
0xdd5d,0x3b21 ,0xdcda,0x3b47 ,0xdc57,0x3b6d ,
0xdbd5,0x3b92 ,0xdb52,0x3bb6 ,0xdad1,0x3bda ,
0xda4f,0x3bfd ,0xd9ce,0x3c20 ,0xd94d,0x3c42 ,
0xd8cd,0x3c64 ,0xd84d,0x3c85 ,0xd7cd,0x3ca5 ,
0xd74e,0x3cc5 ,0xd6cf,0x3ce4 ,0xd651,0x3d03 ,
0xd5d3,0x3d21 ,0xd556,0x3d3f ,0xd4d8,0x3d5b ,
0xd45c,0x3d78 ,0xd3df,0x3d93 ,0xd363,0x3daf ,
0xd2e8,0x3dc9 ,0xd26d,0x3de3 ,0xd1f2,0x3dfc ,
0xd178,0x3e15 ,0xd0fe,0x3e2d ,0xd085,0x3e45 ,
0xd00c,0x3e5c ,0xcf94,0x3e72 ,0xcf1c,0x3e88 ,
0xcea5,0x3e9d ,0xce2e,0x3eb1 ,0xcdb7,0x3ec5 ,
0xcd41,0x3ed8 ,0xcccc,0x3eeb ,0xcc57,0x3efd ,
0xcbe2,0x3f0f ,0xcb6e,0x3f20 ,0xcafb,0x3f30 ,
0xca88,0x3f40 ,0xca15,0x3f4f ,0xc9a3,0x3f5d ,
0xc932,0x3f6b ,0xc8c1,0x3f78 ,0xc851,0x3f85 ,
0xc7e1,0x3f91 ,0xc772,0x3f9c ,0xc703,0x3fa7 ,
0xc695,0x3fb1 ,0xc627,0x3fbb ,0xc5ba,0x3fc4 ,
0xc54e,0x3fcc ,0xc4e2,0x3fd4 ,0xc476,0x3fdb ,
0xc40c,0x3fe1 ,0xc3a1,0x3fe7 ,0xc338,0x3fec ,
0xc2cf,0x3ff1 ,0xc266,0x3ff5 ,0xc1fe,0x3ff8 ,
0xc197,0x3ffb ,0xc130,0x3ffd ,0xc0ca,0x3fff ,
0xc065,0x4000 ,0xc000,0x4000 ,0xbf9c,0x4000 ,
0xbf38,0x3fff ,0xbed5,0x3ffd ,0xbe73,0x3ffb ,
0xbe11,0x3ff8 ,0xbdb0,0x3ff5 ,0xbd50,0x3ff1 ,
0xbcf0,0x3fec ,0xbc91,0x3fe7 ,0xbc32,0x3fe1 ,
0xbbd4,0x3fdb ,0xbb77,0x3fd4 ,0xbb1b,0x3fcc ,
0xbabf,0x3fc4 ,0xba64,0x3fbb ,0xba09,0x3fb1 ,
0xb9af,0x3fa7 ,0xb956,0x3f9c ,0xb8fd,0x3f91 ,
0xb8a6,0x3f85 ,0xb84f,0x3f78 ,0xb7f8,0x3f6b ,
0xb7a2,0x3f5d ,0xb74d,0x3f4f ,0xb6f9,0x3f40 ,
0xb6a5,0x3f30 ,0xb652,0x3f20 ,0xb600,0x3f0f ,
0xb5af,0x3efd ,0xb55e,0x3eeb ,0xb50e,0x3ed8 ,
0xb4be,0x3ec5 ,0xb470,0x3eb1 ,0xb422,0x3e9d ,
0xb3d5,0x3e88 ,0xb388,0x3e72 ,0xb33d,0x3e5c ,
0xb2f2,0x3e45 ,0xb2a7,0x3e2d ,0xb25e,0x3e15 ,
0xb215,0x3dfc ,0xb1cd,0x3de3 ,0xb186,0x3dc9 ,
0xb140,0x3daf ,0xb0fa,0x3d93 ,0xb0b5,0x3d78 ,
0xb071,0x3d5b ,0xb02d,0x3d3f ,0xafeb,0x3d21 ,
0xafa9,0x3d03 ,0xaf68,0x3ce4 ,0xaf28,0x3cc5 ,
0xaee8,0x3ca5 ,0xaea9,0x3c85 ,0xae6b,0x3c64 ,
0xae2e,0x3c42 ,0xadf2,0x3c20 ,0xadb6,0x3bfd ,
0xad7b,0x3bda ,0xad41,0x3bb6 ,0xad08,0x3b92 ,
0xacd0,0x3b6d ,0xac98,0x3b47 ,0xac61,0x3b21 ,
0xac2b,0x3afa ,0xabf6,0x3ad3 ,0xabc2,0x3aab ,
0xab8e,0x3a82 ,0xab5b,0x3a59 ,0xab29,0x3a30 ,
0xaaf8,0x3a06 ,0xaac8,0x39db ,0xaa98,0x39b0 ,
0xaa6a,0x3984 ,0xaa3c,0x3958 ,0xaa0f,0x392b ,
0xa9e3,0x38fd ,0xa9b7,0x38cf ,0xa98d,0x38a1 ,
0xa963,0x3871 ,0xa93a,0x3842 ,0xa912,0x3812 ,
0xa8eb,0x37e1 ,0xa8c5,0x37b0 ,0xa89f,0x377e ,
0xa87b,0x374b ,0xa857,0x3718 ,0xa834,0x36e5 ,
0xa812,0x36b1 ,0xa7f1,0x367d ,0xa7d0,0x3648 ,
0xa7b1,0x3612 ,0xa792,0x35dc ,0xa774,0x35a5 ,
0xa757,0x356e ,0xa73b,0x3537 ,0xa71f,0x34ff ,
0xa705,0x34c6 ,0xa6eb,0x348d ,0xa6d3,0x3453 ,
0xa6bb,0x3419 ,0xa6a4,0x33df ,0xa68e,0x33a3 ,
0xa678,0x3368 ,0xa664,0x332c ,0xa650,0x32ef ,
0xa63e,0x32b2 ,0xa62c,0x3274 ,0xa61b,0x3236 ,
0xa60b,0x31f8 ,0xa5fb,0x31b9 ,0xa5ed,0x3179 ,
0xa5e0,0x3139 ,0xa5d3,0x30f9 ,0xa5c7,0x30b8 ,
0xa5bc,0x3076 ,0xa5b2,0x3034 ,0xa5a9,0x2ff2 ,
0xa5a1,0x2faf ,0xa599,0x2f6c ,0xa593,0x2f28 ,
0xa58d,0x2ee4 ,0xa588,0x2e9f ,0xa585,0x2e5a ,
0xa581,0x2e15 ,0xa57f,0x2dcf ,0xa57e,0x2d88 ,
0xa57e,0x2d41 ,0xa57e,0x2cfa ,0xa57f,0x2cb2 ,
0xa581,0x2c6a ,0xa585,0x2c21 ,0xa588,0x2bd8 ,
0xa58d,0x2b8f ,0xa593,0x2b45 ,0xa599,0x2afb ,
0xa5a1,0x2ab0 ,0xa5a9,0x2a65 ,0xa5b2,0x2a1a ,
0xa5bc,0x29ce ,0xa5c7,0x2981 ,0xa5d3,0x2935 ,
0xa5e0,0x28e7 ,0xa5ed,0x289a ,0xa5fb,0x284c ,
0xa60b,0x27fe ,0xa61b,0x27af ,0xa62c,0x2760 ,
0xa63e,0x2711 ,0xa650,0x26c1 ,0xa664,0x2671 ,
0xa678,0x2620 ,0xa68e,0x25cf ,0xa6a4,0x257e ,
0xa6bb,0x252c ,0xa6d3,0x24da ,0xa6eb,0x2488 ,
0xa705,0x2435 ,0xa71f,0x23e2 ,0xa73b,0x238e ,
0xa757,0x233b ,0xa774,0x22e7 ,0xa792,0x2292 ,
0xa7b1,0x223d ,0xa7d0,0x21e8 ,0xa7f1,0x2193 ,
0xa812,0x213d ,0xa834,0x20e7 ,0xa857,0x2091 ,
0xa87b,0x203a ,0xa89f,0x1fe3 ,0xa8c5,0x1f8c ,
0xa8eb,0x1f34 ,0xa912,0x1edc ,0xa93a,0x1e84 ,
0xa963,0x1e2b ,0xa98d,0x1dd3 ,0xa9b7,0x1d79 ,
0xa9e3,0x1d20 ,0xaa0f,0x1cc6 ,0xaa3c,0x1c6c ,
0xaa6a,0x1c12 ,0xaa98,0x1bb8 ,0xaac8,0x1b5d ,
0xaaf8,0x1b02 ,0xab29,0x1aa7 ,0xab5b,0x1a4b ,
0xab8e,0x19ef ,0xabc2,0x1993 ,0xabf6,0x1937 ,
0xac2b,0x18db ,0xac61,0x187e ,0xac98,0x1821 ,
0xacd0,0x17c4 ,0xad08,0x1766 ,0xad41,0x1709 ,
0xad7b,0x16ab ,0xadb6,0x164c ,0xadf2,0x15ee ,
0xae2e,0x1590 ,0xae6b,0x1531 ,0xaea9,0x14d2 ,
0xaee8,0x1473 ,0xaf28,0x1413 ,0xaf68,0x13b4 ,
0xafa9,0x1354 ,0xafeb,0x12f4 ,0xb02d,0x1294 ,
0xb071,0x1234 ,0xb0b5,0x11d3 ,0xb0fa,0x1173 ,
0xb140,0x1112 ,0xb186,0x10b1 ,0xb1cd,0x1050 ,
0xb215,0x0fee ,0xb25e,0x0f8d ,0xb2a7,0x0f2b ,
0xb2f2,0x0eca ,0xb33d,0x0e68 ,0xb388,0x0e06 ,
0xb3d5,0x0da4 ,0xb422,0x0d41 ,0xb470,0x0cdf ,
0xb4be,0x0c7c ,0xb50e,0x0c1a ,0xb55e,0x0bb7 ,
0xb5af,0x0b54 ,0xb600,0x0af1 ,0xb652,0x0a8e ,
0xb6a5,0x0a2b ,0xb6f9,0x09c7 ,0xb74d,0x0964 ,
0xb7a2,0x0901 ,0xb7f8,0x089d ,0xb84f,0x0839 ,
0xb8a6,0x07d6 ,0xb8fd,0x0772 ,0xb956,0x070e ,
0xb9af,0x06aa ,0xba09,0x0646 ,0xba64,0x05e2 ,
0xbabf,0x057e ,0xbb1b,0x051a ,0xbb77,0x04b5 ,
0xbbd4,0x0451 ,0xbc32,0x03ed ,0xbc91,0x0388 ,
0xbcf0,0x0324 ,0xbd50,0x02c0 ,0xbdb0,0x025b ,
0xbe11,0x01f7 ,0xbe73,0x0192 ,0xbed5,0x012e ,
0xbf38,0x00c9 ,0xbf9c,0x0065 };
extern const int s_Q14R_8;
const int s_Q14R_8 = 1024;
extern const unsigned short t_Q14R_8[2032];
const unsigned short t_Q14R_8[2032] = {
0x4000,0x0000 ,0x4000,0x0000 ,0x4000,0x0000 ,
0x3b21,0x187e ,0x3ec5,0x0c7c ,0x3537,0x238e ,
0x2d41,0x2d41 ,0x3b21,0x187e ,0x187e,0x3b21 ,
0x187e,0x3b21 ,0x3537,0x238e ,0xf384,0x3ec5 ,
0x0000,0x4000 ,0x2d41,0x2d41 ,0xd2bf,0x2d41 ,
0xe782,0x3b21 ,0x238e,0x3537 ,0xc13b,0x0c7c ,
0xd2bf,0x2d41 ,0x187e,0x3b21 ,0xc4df,0xe782 ,
0xc4df,0x187e ,0x0c7c,0x3ec5 ,0xdc72,0xcac9 ,
0x4000,0x0000 ,0x4000,0x0000 ,0x4000,0x0000 ,
0x3fb1,0x0646 ,0x3fec,0x0324 ,0x3f4f,0x0964 ,
0x3ec5,0x0c7c ,0x3fb1,0x0646 ,0x3d3f,0x1294 ,
0x3d3f,0x1294 ,0x3f4f,0x0964 ,0x39db,0x1b5d ,
0x3b21,0x187e ,0x3ec5,0x0c7c ,0x3537,0x238e ,
0x3871,0x1e2b ,0x3e15,0x0f8d ,0x2f6c,0x2afb ,
0x3537,0x238e ,0x3d3f,0x1294 ,0x289a,0x3179 ,
0x3179,0x289a ,0x3c42,0x1590 ,0x20e7,0x36e5 ,
0x2d41,0x2d41 ,0x3b21,0x187e ,0x187e,0x3b21 ,
0x289a,0x3179 ,0x39db,0x1b5d ,0x0f8d,0x3e15 ,
0x238e,0x3537 ,0x3871,0x1e2b ,0x0646,0x3fb1 ,
0x1e2b,0x3871 ,0x36e5,0x20e7 ,0xfcdc,0x3fec ,
0x187e,0x3b21 ,0x3537,0x238e ,0xf384,0x3ec5 ,
0x1294,0x3d3f ,0x3368,0x2620 ,0xea70,0x3c42 ,
0x0c7c,0x3ec5 ,0x3179,0x289a ,0xe1d5,0x3871 ,
0x0646,0x3fb1 ,0x2f6c,0x2afb ,0xd9e0,0x3368 ,
0x0000,0x4000 ,0x2d41,0x2d41 ,0xd2bf,0x2d41 ,
0xf9ba,0x3fb1 ,0x2afb,0x2f6c ,0xcc98,0x2620 ,
0xf384,0x3ec5 ,0x289a,0x3179 ,0xc78f,0x1e2b ,
0xed6c,0x3d3f ,0x2620,0x3368 ,0xc3be,0x1590 ,
0xe782,0x3b21 ,0x238e,0x3537 ,0xc13b,0x0c7c ,
0xe1d5,0x3871 ,0x20e7,0x36e5 ,0xc014,0x0324 ,
0xdc72,0x3537 ,0x1e2b,0x3871 ,0xc04f,0xf9ba ,
0xd766,0x3179 ,0x1b5d,0x39db ,0xc1eb,0xf073 ,
0xd2bf,0x2d41 ,0x187e,0x3b21 ,0xc4df,0xe782 ,
0xce87,0x289a ,0x1590,0x3c42 ,0xc91b,0xdf19 ,
0xcac9,0x238e ,0x1294,0x3d3f ,0xce87,0xd766 ,
0xc78f,0x1e2b ,0x0f8d,0x3e15 ,0xd505,0xd094 ,
0xc4df,0x187e ,0x0c7c,0x3ec5 ,0xdc72,0xcac9 ,
0xc2c1,0x1294 ,0x0964,0x3f4f ,0xe4a3,0xc625 ,
0xc13b,0x0c7c ,0x0646,0x3fb1 ,0xed6c,0xc2c1 ,
0xc04f,0x0646 ,0x0324,0x3fec ,0xf69c,0xc0b1 ,
0x4000,0x0000 ,0x4000,0x0000 ,0x4000,0x0000 ,
0x3ffb,0x0192 ,0x3fff,0x00c9 ,0x3ff5,0x025b ,
0x3fec,0x0324 ,0x3ffb,0x0192 ,0x3fd4,0x04b5 ,
0x3fd4,0x04b5 ,0x3ff5,0x025b ,0x3f9c,0x070e ,
0x3fb1,0x0646 ,0x3fec,0x0324 ,0x3f4f,0x0964 ,
0x3f85,0x07d6 ,0x3fe1,0x03ed ,0x3eeb,0x0bb7 ,
0x3f4f,0x0964 ,0x3fd4,0x04b5 ,0x3e72,0x0e06 ,
0x3f0f,0x0af1 ,0x3fc4,0x057e ,0x3de3,0x1050 ,
0x3ec5,0x0c7c ,0x3fb1,0x0646 ,0x3d3f,0x1294 ,
0x3e72,0x0e06 ,0x3f9c,0x070e ,0x3c85,0x14d2 ,
0x3e15,0x0f8d ,0x3f85,0x07d6 ,0x3bb6,0x1709 ,
0x3daf,0x1112 ,0x3f6b,0x089d ,0x3ad3,0x1937 ,
0x3d3f,0x1294 ,0x3f4f,0x0964 ,0x39db,0x1b5d ,
0x3cc5,0x1413 ,0x3f30,0x0a2b ,0x38cf,0x1d79 ,
0x3c42,0x1590 ,0x3f0f,0x0af1 ,0x37b0,0x1f8c ,
0x3bb6,0x1709 ,0x3eeb,0x0bb7 ,0x367d,0x2193 ,
0x3b21,0x187e ,0x3ec5,0x0c7c ,0x3537,0x238e ,
0x3a82,0x19ef ,0x3e9d,0x0d41 ,0x33df,0x257e ,
0x39db,0x1b5d ,0x3e72,0x0e06 ,0x3274,0x2760 ,
0x392b,0x1cc6 ,0x3e45,0x0eca ,0x30f9,0x2935 ,
0x3871,0x1e2b ,0x3e15,0x0f8d ,0x2f6c,0x2afb ,
0x37b0,0x1f8c ,0x3de3,0x1050 ,0x2dcf,0x2cb2 ,
0x36e5,0x20e7 ,0x3daf,0x1112 ,0x2c21,0x2e5a ,
0x3612,0x223d ,0x3d78,0x11d3 ,0x2a65,0x2ff2 ,
0x3537,0x238e ,0x3d3f,0x1294 ,0x289a,0x3179 ,
0x3453,0x24da ,0x3d03,0x1354 ,0x26c1,0x32ef ,
0x3368,0x2620 ,0x3cc5,0x1413 ,0x24da,0x3453 ,
0x3274,0x2760 ,0x3c85,0x14d2 ,0x22e7,0x35a5 ,
0x3179,0x289a ,0x3c42,0x1590 ,0x20e7,0x36e5 ,
0x3076,0x29ce ,0x3bfd,0x164c ,0x1edc,0x3812 ,
0x2f6c,0x2afb ,0x3bb6,0x1709 ,0x1cc6,0x392b ,
0x2e5a,0x2c21 ,0x3b6d,0x17c4 ,0x1aa7,0x3a30 ,
0x2d41,0x2d41 ,0x3b21,0x187e ,0x187e,0x3b21 ,
0x2c21,0x2e5a ,0x3ad3,0x1937 ,0x164c,0x3bfd ,
0x2afb,0x2f6c ,0x3a82,0x19ef ,0x1413,0x3cc5 ,
0x29ce,0x3076 ,0x3a30,0x1aa7 ,0x11d3,0x3d78 ,
0x289a,0x3179 ,0x39db,0x1b5d ,0x0f8d,0x3e15 ,
0x2760,0x3274 ,0x3984,0x1c12 ,0x0d41,0x3e9d ,
0x2620,0x3368 ,0x392b,0x1cc6 ,0x0af1,0x3f0f ,
0x24da,0x3453 ,0x38cf,0x1d79 ,0x089d,0x3f6b ,
0x238e,0x3537 ,0x3871,0x1e2b ,0x0646,0x3fb1 ,
0x223d,0x3612 ,0x3812,0x1edc ,0x03ed,0x3fe1 ,
0x20e7,0x36e5 ,0x37b0,0x1f8c ,0x0192,0x3ffb ,
0x1f8c,0x37b0 ,0x374b,0x203a ,0xff37,0x3fff ,
0x1e2b,0x3871 ,0x36e5,0x20e7 ,0xfcdc,0x3fec ,
0x1cc6,0x392b ,0x367d,0x2193 ,0xfa82,0x3fc4 ,
0x1b5d,0x39db ,0x3612,0x223d ,0xf82a,0x3f85 ,
0x19ef,0x3a82 ,0x35a5,0x22e7 ,0xf5d5,0x3f30 ,
0x187e,0x3b21 ,0x3537,0x238e ,0xf384,0x3ec5 ,
0x1709,0x3bb6 ,0x34c6,0x2435 ,0xf136,0x3e45 ,
0x1590,0x3c42 ,0x3453,0x24da ,0xeeee,0x3daf ,
0x1413,0x3cc5 ,0x33df,0x257e ,0xecac,0x3d03 ,
0x1294,0x3d3f ,0x3368,0x2620 ,0xea70,0x3c42 ,
0x1112,0x3daf ,0x32ef,0x26c1 ,0xe83c,0x3b6d ,
0x0f8d,0x3e15 ,0x3274,0x2760 ,0xe611,0x3a82 ,
0x0e06,0x3e72 ,0x31f8,0x27fe ,0xe3ee,0x3984 ,
0x0c7c,0x3ec5 ,0x3179,0x289a ,0xe1d5,0x3871 ,
0x0af1,0x3f0f ,0x30f9,0x2935 ,0xdfc6,0x374b ,
0x0964,0x3f4f ,0x3076,0x29ce ,0xddc3,0x3612 ,
0x07d6,0x3f85 ,0x2ff2,0x2a65 ,0xdbcb,0x34c6 ,
0x0646,0x3fb1 ,0x2f6c,0x2afb ,0xd9e0,0x3368 ,
0x04b5,0x3fd4 ,0x2ee4,0x2b8f ,0xd802,0x31f8 ,
0x0324,0x3fec ,0x2e5a,0x2c21 ,0xd632,0x3076 ,
0x0192,0x3ffb ,0x2dcf,0x2cb2 ,0xd471,0x2ee4 ,
0x0000,0x4000 ,0x2d41,0x2d41 ,0xd2bf,0x2d41 ,
0xfe6e,0x3ffb ,0x2cb2,0x2dcf ,0xd11c,0x2b8f ,
0xfcdc,0x3fec ,0x2c21,0x2e5a ,0xcf8a,0x29ce ,
0xfb4b,0x3fd4 ,0x2b8f,0x2ee4 ,0xce08,0x27fe ,
0xf9ba,0x3fb1 ,0x2afb,0x2f6c ,0xcc98,0x2620 ,
0xf82a,0x3f85 ,0x2a65,0x2ff2 ,0xcb3a,0x2435 ,
0xf69c,0x3f4f ,0x29ce,0x3076 ,0xc9ee,0x223d ,
0xf50f,0x3f0f ,0x2935,0x30f9 ,0xc8b5,0x203a ,
0xf384,0x3ec5 ,0x289a,0x3179 ,0xc78f,0x1e2b ,
0xf1fa,0x3e72 ,0x27fe,0x31f8 ,0xc67c,0x1c12 ,
0xf073,0x3e15 ,0x2760,0x3274 ,0xc57e,0x19ef ,
0xeeee,0x3daf ,0x26c1,0x32ef ,0xc493,0x17c4 ,
0xed6c,0x3d3f ,0x2620,0x3368 ,0xc3be,0x1590 ,
0xebed,0x3cc5 ,0x257e,0x33df ,0xc2fd,0x1354 ,
0xea70,0x3c42 ,0x24da,0x3453 ,0xc251,0x1112 ,
0xe8f7,0x3bb6 ,0x2435,0x34c6 ,0xc1bb,0x0eca ,
0xe782,0x3b21 ,0x238e,0x3537 ,0xc13b,0x0c7c ,
0xe611,0x3a82 ,0x22e7,0x35a5 ,0xc0d0,0x0a2b ,
0xe4a3,0x39db ,0x223d,0x3612 ,0xc07b,0x07d6 ,
0xe33a,0x392b ,0x2193,0x367d ,0xc03c,0x057e ,
0xe1d5,0x3871 ,0x20e7,0x36e5 ,0xc014,0x0324 ,
0xe074,0x37b0 ,0x203a,0x374b ,0xc001,0x00c9 ,
0xdf19,0x36e5 ,0x1f8c,0x37b0 ,0xc005,0xfe6e ,
0xddc3,0x3612 ,0x1edc,0x3812 ,0xc01f,0xfc13 ,
0xdc72,0x3537 ,0x1e2b,0x3871 ,0xc04f,0xf9ba ,
0xdb26,0x3453 ,0x1d79,0x38cf ,0xc095,0xf763 ,
0xd9e0,0x3368 ,0x1cc6,0x392b ,0xc0f1,0xf50f ,
0xd8a0,0x3274 ,0x1c12,0x3984 ,0xc163,0xf2bf ,
0xd766,0x3179 ,0x1b5d,0x39db ,0xc1eb,0xf073 ,
0xd632,0x3076 ,0x1aa7,0x3a30 ,0xc288,0xee2d ,
0xd505,0x2f6c ,0x19ef,0x3a82 ,0xc33b,0xebed ,
0xd3df,0x2e5a ,0x1937,0x3ad3 ,0xc403,0xe9b4 ,
0xd2bf,0x2d41 ,0x187e,0x3b21 ,0xc4df,0xe782 ,
0xd1a6,0x2c21 ,0x17c4,0x3b6d ,0xc5d0,0xe559 ,
0xd094,0x2afb ,0x1709,0x3bb6 ,0xc6d5,0xe33a ,
0xcf8a,0x29ce ,0x164c,0x3bfd ,0xc7ee,0xe124 ,
0xce87,0x289a ,0x1590,0x3c42 ,0xc91b,0xdf19 ,
0xcd8c,0x2760 ,0x14d2,0x3c85 ,0xca5b,0xdd19 ,
0xcc98,0x2620 ,0x1413,0x3cc5 ,0xcbad,0xdb26 ,
0xcbad,0x24da ,0x1354,0x3d03 ,0xcd11,0xd93f ,
0xcac9,0x238e ,0x1294,0x3d3f ,0xce87,0xd766 ,
0xc9ee,0x223d ,0x11d3,0x3d78 ,0xd00e,0xd59b ,
0xc91b,0x20e7 ,0x1112,0x3daf ,0xd1a6,0xd3df ,
0xc850,0x1f8c ,0x1050,0x3de3 ,0xd34e,0xd231 ,
0xc78f,0x1e2b ,0x0f8d,0x3e15 ,0xd505,0xd094 ,
0xc6d5,0x1cc6 ,0x0eca,0x3e45 ,0xd6cb,0xcf07 ,
0xc625,0x1b5d ,0x0e06,0x3e72 ,0xd8a0,0xcd8c ,
0xc57e,0x19ef ,0x0d41,0x3e9d ,0xda82,0xcc21 ,
0xc4df,0x187e ,0x0c7c,0x3ec5 ,0xdc72,0xcac9 ,
0xc44a,0x1709 ,0x0bb7,0x3eeb ,0xde6d,0xc983 ,
0xc3be,0x1590 ,0x0af1,0x3f0f ,0xe074,0xc850 ,
0xc33b,0x1413 ,0x0a2b,0x3f30 ,0xe287,0xc731 ,
0xc2c1,0x1294 ,0x0964,0x3f4f ,0xe4a3,0xc625 ,
0xc251,0x1112 ,0x089d,0x3f6b ,0xe6c9,0xc52d ,
0xc1eb,0x0f8d ,0x07d6,0x3f85 ,0xe8f7,0xc44a ,
0xc18e,0x0e06 ,0x070e,0x3f9c ,0xeb2e,0xc37b ,
0xc13b,0x0c7c ,0x0646,0x3fb1 ,0xed6c,0xc2c1 ,
0xc0f1,0x0af1 ,0x057e,0x3fc4 ,0xefb0,0xc21d ,
0xc0b1,0x0964 ,0x04b5,0x3fd4 ,0xf1fa,0xc18e ,
0xc07b,0x07d6 ,0x03ed,0x3fe1 ,0xf449,0xc115 ,
0xc04f,0x0646 ,0x0324,0x3fec ,0xf69c,0xc0b1 ,
0xc02c,0x04b5 ,0x025b,0x3ff5 ,0xf8f2,0xc064 ,
0xc014,0x0324 ,0x0192,0x3ffb ,0xfb4b,0xc02c ,
0xc005,0x0192 ,0x00c9,0x3fff ,0xfda5,0xc00b ,
0x4000,0x0000 ,0x4000,0x0065 ,0x3fff,0x00c9 ,
0x3ffd,0x012e ,0x3ffb,0x0192 ,0x3ff8,0x01f7 ,
0x3ff5,0x025b ,0x3ff1,0x02c0 ,0x3fec,0x0324 ,
0x3fe7,0x0388 ,0x3fe1,0x03ed ,0x3fdb,0x0451 ,
0x3fd4,0x04b5 ,0x3fcc,0x051a ,0x3fc4,0x057e ,
0x3fbb,0x05e2 ,0x3fb1,0x0646 ,0x3fa7,0x06aa ,
0x3f9c,0x070e ,0x3f91,0x0772 ,0x3f85,0x07d6 ,
0x3f78,0x0839 ,0x3f6b,0x089d ,0x3f5d,0x0901 ,
0x3f4f,0x0964 ,0x3f40,0x09c7 ,0x3f30,0x0a2b ,
0x3f20,0x0a8e ,0x3f0f,0x0af1 ,0x3efd,0x0b54 ,
0x3eeb,0x0bb7 ,0x3ed8,0x0c1a ,0x3ec5,0x0c7c ,
0x3eb1,0x0cdf ,0x3e9d,0x0d41 ,0x3e88,0x0da4 ,
0x3e72,0x0e06 ,0x3e5c,0x0e68 ,0x3e45,0x0eca ,
0x3e2d,0x0f2b ,0x3e15,0x0f8d ,0x3dfc,0x0fee ,
0x3de3,0x1050 ,0x3dc9,0x10b1 ,0x3daf,0x1112 ,
0x3d93,0x1173 ,0x3d78,0x11d3 ,0x3d5b,0x1234 ,
0x3d3f,0x1294 ,0x3d21,0x12f4 ,0x3d03,0x1354 ,
0x3ce4,0x13b4 ,0x3cc5,0x1413 ,0x3ca5,0x1473 ,
0x3c85,0x14d2 ,0x3c64,0x1531 ,0x3c42,0x1590 ,
0x3c20,0x15ee ,0x3bfd,0x164c ,0x3bda,0x16ab ,
0x3bb6,0x1709 ,0x3b92,0x1766 ,0x3b6d,0x17c4 ,
0x3b47,0x1821 ,0x3b21,0x187e ,0x3afa,0x18db ,
0x3ad3,0x1937 ,0x3aab,0x1993 ,0x3a82,0x19ef ,
0x3a59,0x1a4b ,0x3a30,0x1aa7 ,0x3a06,0x1b02 ,
0x39db,0x1b5d ,0x39b0,0x1bb8 ,0x3984,0x1c12 ,
0x3958,0x1c6c ,0x392b,0x1cc6 ,0x38fd,0x1d20 ,
0x38cf,0x1d79 ,0x38a1,0x1dd3 ,0x3871,0x1e2b ,
0x3842,0x1e84 ,0x3812,0x1edc ,0x37e1,0x1f34 ,
0x37b0,0x1f8c ,0x377e,0x1fe3 ,0x374b,0x203a ,
0x3718,0x2091 ,0x36e5,0x20e7 ,0x36b1,0x213d ,
0x367d,0x2193 ,0x3648,0x21e8 ,0x3612,0x223d ,
0x35dc,0x2292 ,0x35a5,0x22e7 ,0x356e,0x233b ,
0x3537,0x238e ,0x34ff,0x23e2 ,0x34c6,0x2435 ,
0x348d,0x2488 ,0x3453,0x24da ,0x3419,0x252c ,
0x33df,0x257e ,0x33a3,0x25cf ,0x3368,0x2620 ,
0x332c,0x2671 ,0x32ef,0x26c1 ,0x32b2,0x2711 ,
0x3274,0x2760 ,0x3236,0x27af ,0x31f8,0x27fe ,
0x31b9,0x284c ,0x3179,0x289a ,0x3139,0x28e7 ,
0x30f9,0x2935 ,0x30b8,0x2981 ,0x3076,0x29ce ,
0x3034,0x2a1a ,0x2ff2,0x2a65 ,0x2faf,0x2ab0 ,
0x2f6c,0x2afb ,0x2f28,0x2b45 ,0x2ee4,0x2b8f ,
0x2e9f,0x2bd8 ,0x2e5a,0x2c21 ,0x2e15,0x2c6a ,
0x2dcf,0x2cb2 ,0x2d88,0x2cfa ,0x2d41,0x2d41 ,
0x2cfa,0x2d88 ,0x2cb2,0x2dcf ,0x2c6a,0x2e15 ,
0x2c21,0x2e5a ,0x2bd8,0x2e9f ,0x2b8f,0x2ee4 ,
0x2b45,0x2f28 ,0x2afb,0x2f6c ,0x2ab0,0x2faf ,
0x2a65,0x2ff2 ,0x2a1a,0x3034 ,0x29ce,0x3076 ,
0x2981,0x30b8 ,0x2935,0x30f9 ,0x28e7,0x3139 ,
0x289a,0x3179 ,0x284c,0x31b9 ,0x27fe,0x31f8 ,
0x27af,0x3236 ,0x2760,0x3274 ,0x2711,0x32b2 ,
0x26c1,0x32ef ,0x2671,0x332c ,0x2620,0x3368 ,
0x25cf,0x33a3 ,0x257e,0x33df ,0x252c,0x3419 ,
0x24da,0x3453 ,0x2488,0x348d ,0x2435,0x34c6 ,
0x23e2,0x34ff ,0x238e,0x3537 ,0x233b,0x356e ,
0x22e7,0x35a5 ,0x2292,0x35dc ,0x223d,0x3612 ,
0x21e8,0x3648 ,0x2193,0x367d ,0x213d,0x36b1 ,
0x20e7,0x36e5 ,0x2091,0x3718 ,0x203a,0x374b ,
0x1fe3,0x377e ,0x1f8c,0x37b0 ,0x1f34,0x37e1 ,
0x1edc,0x3812 ,0x1e84,0x3842 ,0x1e2b,0x3871 ,
0x1dd3,0x38a1 ,0x1d79,0x38cf ,0x1d20,0x38fd ,
0x1cc6,0x392b ,0x1c6c,0x3958 ,0x1c12,0x3984 ,
0x1bb8,0x39b0 ,0x1b5d,0x39db ,0x1b02,0x3a06 ,
0x1aa7,0x3a30 ,0x1a4b,0x3a59 ,0x19ef,0x3a82 ,
0x1993,0x3aab ,0x1937,0x3ad3 ,0x18db,0x3afa ,
0x187e,0x3b21 ,0x1821,0x3b47 ,0x17c4,0x3b6d ,
0x1766,0x3b92 ,0x1709,0x3bb6 ,0x16ab,0x3bda ,
0x164c,0x3bfd ,0x15ee,0x3c20 ,0x1590,0x3c42 ,
0x1531,0x3c64 ,0x14d2,0x3c85 ,0x1473,0x3ca5 ,
0x1413,0x3cc5 ,0x13b4,0x3ce4 ,0x1354,0x3d03 ,
0x12f4,0x3d21 ,0x1294,0x3d3f ,0x1234,0x3d5b ,
0x11d3,0x3d78 ,0x1173,0x3d93 ,0x1112,0x3daf ,
0x10b1,0x3dc9 ,0x1050,0x3de3 ,0x0fee,0x3dfc ,
0x0f8d,0x3e15 ,0x0f2b,0x3e2d ,0x0eca,0x3e45 ,
0x0e68,0x3e5c ,0x0e06,0x3e72 ,0x0da4,0x3e88 ,
0x0d41,0x3e9d ,0x0cdf,0x3eb1 ,0x0c7c,0x3ec5 ,
0x0c1a,0x3ed8 ,0x0bb7,0x3eeb ,0x0b54,0x3efd ,
0x0af1,0x3f0f ,0x0a8e,0x3f20 ,0x0a2b,0x3f30 ,
0x09c7,0x3f40 ,0x0964,0x3f4f ,0x0901,0x3f5d ,
0x089d,0x3f6b ,0x0839,0x3f78 ,0x07d6,0x3f85 ,
0x0772,0x3f91 ,0x070e,0x3f9c ,0x06aa,0x3fa7 ,
0x0646,0x3fb1 ,0x05e2,0x3fbb ,0x057e,0x3fc4 ,
0x051a,0x3fcc ,0x04b5,0x3fd4 ,0x0451,0x3fdb ,
0x03ed,0x3fe1 ,0x0388,0x3fe7 ,0x0324,0x3fec ,
0x02c0,0x3ff1 ,0x025b,0x3ff5 ,0x01f7,0x3ff8 ,
0x0192,0x3ffb ,0x012e,0x3ffd ,0x00c9,0x3fff ,
0x0065,0x4000 ,0x0000,0x4000 ,0xff9b,0x4000 ,
0xff37,0x3fff ,0xfed2,0x3ffd ,0xfe6e,0x3ffb ,
0xfe09,0x3ff8 ,0xfda5,0x3ff5 ,0xfd40,0x3ff1 ,
0xfcdc,0x3fec ,0xfc78,0x3fe7 ,0xfc13,0x3fe1 ,
0xfbaf,0x3fdb ,0xfb4b,0x3fd4 ,0xfae6,0x3fcc ,
0xfa82,0x3fc4 ,0xfa1e,0x3fbb ,0xf9ba,0x3fb1 ,
0xf956,0x3fa7 ,0xf8f2,0x3f9c ,0xf88e,0x3f91 ,
0xf82a,0x3f85 ,0xf7c7,0x3f78 ,0xf763,0x3f6b ,
0xf6ff,0x3f5d ,0xf69c,0x3f4f ,0xf639,0x3f40 ,
0xf5d5,0x3f30 ,0xf572,0x3f20 ,0xf50f,0x3f0f ,
0xf4ac,0x3efd ,0xf449,0x3eeb ,0xf3e6,0x3ed8 ,
0xf384,0x3ec5 ,0xf321,0x3eb1 ,0xf2bf,0x3e9d ,
0xf25c,0x3e88 ,0xf1fa,0x3e72 ,0xf198,0x3e5c ,
0xf136,0x3e45 ,0xf0d5,0x3e2d ,0xf073,0x3e15 ,
0xf012,0x3dfc ,0xefb0,0x3de3 ,0xef4f,0x3dc9 ,
0xeeee,0x3daf ,0xee8d,0x3d93 ,0xee2d,0x3d78 ,
0xedcc,0x3d5b ,0xed6c,0x3d3f ,0xed0c,0x3d21 ,
0xecac,0x3d03 ,0xec4c,0x3ce4 ,0xebed,0x3cc5 ,
0xeb8d,0x3ca5 ,0xeb2e,0x3c85 ,0xeacf,0x3c64 ,
0xea70,0x3c42 ,0xea12,0x3c20 ,0xe9b4,0x3bfd ,
0xe955,0x3bda ,0xe8f7,0x3bb6 ,0xe89a,0x3b92 ,
0xe83c,0x3b6d ,0xe7df,0x3b47 ,0xe782,0x3b21 ,
0xe725,0x3afa ,0xe6c9,0x3ad3 ,0xe66d,0x3aab ,
0xe611,0x3a82 ,0xe5b5,0x3a59 ,0xe559,0x3a30 ,
0xe4fe,0x3a06 ,0xe4a3,0x39db ,0xe448,0x39b0 ,
0xe3ee,0x3984 ,0xe394,0x3958 ,0xe33a,0x392b ,
0xe2e0,0x38fd ,0xe287,0x38cf ,0xe22d,0x38a1 ,
0xe1d5,0x3871 ,0xe17c,0x3842 ,0xe124,0x3812 ,
0xe0cc,0x37e1 ,0xe074,0x37b0 ,0xe01d,0x377e ,
0xdfc6,0x374b ,0xdf6f,0x3718 ,0xdf19,0x36e5 ,
0xdec3,0x36b1 ,0xde6d,0x367d ,0xde18,0x3648 ,
0xddc3,0x3612 ,0xdd6e,0x35dc ,0xdd19,0x35a5 ,
0xdcc5,0x356e ,0xdc72,0x3537 ,0xdc1e,0x34ff ,
0xdbcb,0x34c6 ,0xdb78,0x348d ,0xdb26,0x3453 ,
0xdad4,0x3419 ,0xda82,0x33df ,0xda31,0x33a3 ,
0xd9e0,0x3368 ,0xd98f,0x332c ,0xd93f,0x32ef ,
0xd8ef,0x32b2 ,0xd8a0,0x3274 ,0xd851,0x3236 ,
0xd802,0x31f8 ,0xd7b4,0x31b9 ,0xd766,0x3179 ,
0xd719,0x3139 ,0xd6cb,0x30f9 ,0xd67f,0x30b8 ,
0xd632,0x3076 ,0xd5e6,0x3034 ,0xd59b,0x2ff2 ,
0xd550,0x2faf ,0xd505,0x2f6c ,0xd4bb,0x2f28 ,
0xd471,0x2ee4 ,0xd428,0x2e9f ,0xd3df,0x2e5a ,
0xd396,0x2e15 ,0xd34e,0x2dcf ,0xd306,0x2d88 ,
0xd2bf,0x2d41 ,0xd278,0x2cfa ,0xd231,0x2cb2 ,
0xd1eb,0x2c6a ,0xd1a6,0x2c21 ,0xd161,0x2bd8 ,
0xd11c,0x2b8f ,0xd0d8,0x2b45 ,0xd094,0x2afb ,
0xd051,0x2ab0 ,0xd00e,0x2a65 ,0xcfcc,0x2a1a ,
0xcf8a,0x29ce ,0xcf48,0x2981 ,0xcf07,0x2935 ,
0xcec7,0x28e7 ,0xce87,0x289a ,0xce47,0x284c ,
0xce08,0x27fe ,0xcdca,0x27af ,0xcd8c,0x2760 ,
0xcd4e,0x2711 ,0xcd11,0x26c1 ,0xccd4,0x2671 ,
0xcc98,0x2620 ,0xcc5d,0x25cf ,0xcc21,0x257e ,
0xcbe7,0x252c ,0xcbad,0x24da ,0xcb73,0x2488 ,
0xcb3a,0x2435 ,0xcb01,0x23e2 ,0xcac9,0x238e ,
0xca92,0x233b ,0xca5b,0x22e7 ,0xca24,0x2292 ,
0xc9ee,0x223d ,0xc9b8,0x21e8 ,0xc983,0x2193 ,
0xc94f,0x213d ,0xc91b,0x20e7 ,0xc8e8,0x2091 ,
0xc8b5,0x203a ,0xc882,0x1fe3 ,0xc850,0x1f8c ,
0xc81f,0x1f34 ,0xc7ee,0x1edc ,0xc7be,0x1e84 ,
0xc78f,0x1e2b ,0xc75f,0x1dd3 ,0xc731,0x1d79 ,
0xc703,0x1d20 ,0xc6d5,0x1cc6 ,0xc6a8,0x1c6c ,
0xc67c,0x1c12 ,0xc650,0x1bb8 ,0xc625,0x1b5d ,
0xc5fa,0x1b02 ,0xc5d0,0x1aa7 ,0xc5a7,0x1a4b ,
0xc57e,0x19ef ,0xc555,0x1993 ,0xc52d,0x1937 ,
0xc506,0x18db ,0xc4df,0x187e ,0xc4b9,0x1821 ,
0xc493,0x17c4 ,0xc46e,0x1766 ,0xc44a,0x1709 ,
0xc426,0x16ab ,0xc403,0x164c ,0xc3e0,0x15ee ,
0xc3be,0x1590 ,0xc39c,0x1531 ,0xc37b,0x14d2 ,
0xc35b,0x1473 ,0xc33b,0x1413 ,0xc31c,0x13b4 ,
0xc2fd,0x1354 ,0xc2df,0x12f4 ,0xc2c1,0x1294 ,
0xc2a5,0x1234 ,0xc288,0x11d3 ,0xc26d,0x1173 ,
0xc251,0x1112 ,0xc237,0x10b1 ,0xc21d,0x1050 ,
0xc204,0x0fee ,0xc1eb,0x0f8d ,0xc1d3,0x0f2b ,
0xc1bb,0x0eca ,0xc1a4,0x0e68 ,0xc18e,0x0e06 ,
0xc178,0x0da4 ,0xc163,0x0d41 ,0xc14f,0x0cdf ,
0xc13b,0x0c7c ,0xc128,0x0c1a ,0xc115,0x0bb7 ,
0xc103,0x0b54 ,0xc0f1,0x0af1 ,0xc0e0,0x0a8e ,
0xc0d0,0x0a2b ,0xc0c0,0x09c7 ,0xc0b1,0x0964 ,
0xc0a3,0x0901 ,0xc095,0x089d ,0xc088,0x0839 ,
0xc07b,0x07d6 ,0xc06f,0x0772 ,0xc064,0x070e ,
0xc059,0x06aa ,0xc04f,0x0646 ,0xc045,0x05e2 ,
0xc03c,0x057e ,0xc034,0x051a ,0xc02c,0x04b5 ,
0xc025,0x0451 ,0xc01f,0x03ed ,0xc019,0x0388 ,
0xc014,0x0324 ,0xc00f,0x02c0 ,0xc00b,0x025b ,
0xc008,0x01f7 ,0xc005,0x0192 ,0xc003,0x012e ,
0xc001,0x00c9 ,0xc000,0x0065 };

View File

@ -1,19 +0,0 @@
/*
* Copyright (C) ARM Limited 1998-2000. All rights reserved.
*
* t_rad.c
*
*/
extern const unsigned short t_Q14S_rad8[2];
const unsigned short t_Q14S_rad8[2] = { 0x0000,0x2d41 };
/*
extern const int t_Q30S_rad8[2];
const int t_Q30S_rad8[2] = { 0x00000000,0x2d413ccd };
*/
extern const unsigned short t_Q14R_rad8[2];
const unsigned short t_Q14R_rad8[2] = { 0x2d41,0x2d41 };
/*
extern const int t_Q30R_rad8[2];
const int t_Q30R_rad8[2] = { 0x2d413ccd,0x2d413ccd };
*/

View File

@ -1,227 +0,0 @@
.globl FFT_4OFQ14
FFT_4OFQ14:
stmdb sp!, {r4 - r11, lr}
ldr lr, =s_Q14S_8
ldr lr, [lr]
cmp r2, lr
movgt r0, #1
ldmgtia sp!, {r4 - r11, pc}
stmdb sp!, {r1, r2}
mov r3, #0
mov r2, r2
LBL1:
add r12, r0, r3, lsl #2
add r12, r12, r2, lsr #1
ldrsh r5, [r12, #2]
ldrsh r4, [r12], +r2
ldrsh r9, [r12, #2]
ldrsh r8, [r12], +r2
ldrsh r7, [r12, #2]
ldrsh r6, [r12], +r2
ldrsh r11, [r12, #2]
ldrsh r10, [r12], +r2
add r4, r4, r6
add r5, r5, r7
sub r6, r4, r6, lsl #1
sub r7, r5, r7, lsl #1
sub r12, r8, r10
sub lr, r9, r11
add r10, r8, r10
add r11, r9, r11
sub r9, r4, r10
sub r8, r5, r11
add r4, r4, r10
add r5, r5, r11
sub r10, r6, lr
add r11, r7, r12
add r6, r6, lr
sub r7, r7, r12
ldr lr, =t_Q14R_rad8
ldrsh lr, [lr]
stmdb sp!, {r2}
add r12, r6, r7
mul r6, r12, lr
rsb r12, r12, r7, lsl #1
mul r7, r12, lr
sub r12, r11, r10
mul r10, r12, lr
sub r12, r12, r11, lsl #1
mul r11, r12, lr
ldmia sp!, {r2}
stmdb sp!, {r4 - r11}
add r4, r0, r3, lsl #2
ldrsh r7, [r4, #2]
ldrsh r6, [r4], +r2
ldrsh r11, [r4, #2]
ldrsh r10, [r4], +r2
ldrsh r9, [r4, #2]
ldrsh r8, [r4], +r2
ldrsh lr, [r4, #2]
ldrsh r12, [r4], +r2
mov r7, r7, asr #3
mov r6, r6, asr #3
add r6, r6, r8, asr #3
add r7, r7, r9, asr #3
sub r8, r6, r8, asr #2
sub r9, r7, r9, asr #2
sub r4, r10, r12
sub r5, r11, lr
add r10, r10, r12
add r11, r11, lr
add r6, r6, r10, asr #3
add r7, r7, r11, asr #3
sub r10, r6, r10, asr #2
sub r11, r7, r11, asr #2
sub r12, r8, r5, asr #3
add lr, r9, r4, asr #3
add r8, r8, r5, asr #3
sub r9, r9, r4, asr #3
ldmia sp!, {r4, r5}
add r6, r6, r4, asr #3
add r7, r7, r5, asr #3
sub r4, r6, r4, asr #2
sub r5, r7, r5, asr #2
strh r7, [r1, #2]
strh r6, [r1], #4
ldmia sp!, {r6, r7}
add r8, r8, r6, asr #17
add r9, r9, r7, asr #17
sub r6, r8, r6, asr #16
sub r7, r9, r7, asr #16
strh r9, [r1, #2]
strh r8, [r1], #4
ldmia sp!, {r8, r9}
add r10, r10, r8, asr #3
sub r11, r11, r9, asr #3
sub r8, r10, r8, asr #2
add r9, r11, r9, asr #2
strh r11, [r1, #2]
strh r10, [r1], #4
ldmia sp!, {r10, r11}
add r12, r12, r10, asr #17
add lr, lr, r11, asr #17
sub r10, r12, r10, asr #16
sub r11, lr, r11, asr #16
strh lr, [r1, #2]
strh r12, [r1], #4
strh r5, [r1, #2]
strh r4, [r1], #4
strh r7, [r1, #2]
strh r6, [r1], #4
strh r9, [r1, #2]
strh r8, [r1], #4
strh r11, [r1, #2]
strh r10, [r1], #4
eor r3, r3, r2, lsr #4
tst r3, r2, lsr #4
bne LBL1
eor r3, r3, r2, lsr #5
tst r3, r2, lsr #5
bne LBL1
mov r12, r2, lsr #6
LBL2:
eor r3, r3, r12
tst r3, r12
bne LBL1
movs r12, r12, lsr #1
bne LBL2
ldmia sp!, {r1, r2}
mov r3, r2, lsr #3
mov r2, #0x20
ldr r0, =t_Q14S_8
cmp r3, #1
beq LBL3
LBL6:
mov r3, r3, lsr #2
stmdb sp!, {r1, r3}
add r12, r2, r2, lsl #1
add r1, r1, r12
sub r3, r3, #1, 16
LBL5:
add r3, r3, r2, lsl #14
LBL4:
ldrsh r6, [r0], #2
ldrsh r7, [r0], #2
ldrsh r8, [r0], #2
ldrsh r9, [r0], #2
ldrsh r10, [r0], #2
ldrsh r11, [r0], #2
ldrsh r5, [r1, #2]
ldrsh r4, [r1], -r2
sub lr, r5, r4
mul r12, lr, r11
add lr, r10, r11, lsl #1
mla r11, r5, r10, r12
mla r10, r4, lr, r12
ldrsh r5, [r1, #2]
ldrsh r4, [r1], -r2
sub lr, r5, r4
mul r12, lr, r9
add lr, r8, r9, lsl #1
mla r9, r5, r8, r12
mla r8, r4, lr, r12
ldrsh r5, [r1, #2]
ldrsh r4, [r1], -r2
sub lr, r5, r4
mul r12, lr, r7
add lr, r6, r7, lsl #1
mla r7, r5, r6, r12
mla r6, r4, lr, r12
ldrsh r5, [r1, #2]
ldrsh r4, [r1]
mov r5, r5, asr #2
mov r4, r4, asr #2
add r12, r4, r6, asr #16
add lr, r5, r7, asr #16
sub r4, r4, r6, asr #16
sub r5, r5, r7, asr #16
add r6, r8, r10
add r7, r9, r11
sub r8, r8, r10
sub r9, r9, r11
add r10, r12, r6, asr #16
add r11, lr, r7, asr #16
strh r11, [r1, #2]
strh r10, [r1], +r2
add r10, r4, r9, asr #16
sub r11, r5, r8, asr #16
strh r11, [r1, #2]
strh r10, [r1], +r2
sub r10, r12, r6, asr #16
sub r11, lr, r7, asr #16
strh r11, [r1, #2]
strh r10, [r1], +r2
sub r10, r4, r9, asr #16
add r11, r5, r8, asr #16
strh r11, [r1, #2]
strh r10, [r1], #4
subs r3, r3, #1, 16
bge LBL4
add r12, r2, r2, lsl #1
add r1, r1, r12
sub r0, r0, r12
sub r3, r3, #1
movs lr, r3, lsl #16
bne LBL5
add r0, r0, r12
ldmia sp!, {r1, r3}
mov r2, r2, lsl #2
cmp r3, #2
bgt LBL6
LBL3:
mov r0, #0
ldmia sp!, {r4 - r11, pc}
andeq r3, r1, r0, lsr #32
andeq r10, r1, r12, ror #31
andeq r3, r1, r8, lsr #32

View File

@ -1,221 +0,0 @@
.globl FFT_4OIQ14
FFT_4OIQ14:
stmdb sp!, {r4 - r11, lr}
ldr lr, =s_Q14S_8
ldr lr, [lr]
cmp r2, lr
movgt r0, #1
ldmgtia sp!, {r4 - r11, pc}
stmdb sp!, {r1, r2}
mov r3, #0
mov r2, r2
LBL1:
add r12, r0, r3, lsl #2
add r12, r12, r2, lsr #1
ldrsh r5, [r12, #2]
ldrsh r4, [r12], +r2
ldrsh r9, [r12, #2]
ldrsh r8, [r12], +r2
ldrsh r7, [r12, #2]
ldrsh r6, [r12], +r2
ldrsh r11, [r12, #2]
ldrsh r10, [r12], +r2
add r4, r4, r6
add r5, r5, r7
sub r6, r4, r6, lsl #1
sub r7, r5, r7, lsl #1
sub r12, r8, r10
sub lr, r9, r11
add r10, r8, r10
add r11, r9, r11
sub r9, r4, r10
sub r8, r5, r11
add r4, r4, r10
add r5, r5, r11
add r10, r6, lr
sub r11, r7, r12
sub r6, r6, lr
add r7, r7, r12
ldr lr, =t_Q14R_rad8
ldrsh lr, [lr]
stmdb sp!, {r2}
sub r12, r6, r7
mul r6, r12, lr
add r12, r12, r7, lsl #1
mul r7, r12, lr
sub r12, r10, r11
mul r11, r12, lr
sub r12, r12, r10, lsl #1
mul r10, r12, lr
ldmia sp!, {r2}
stmdb sp!, {r4 - r11}
add r4, r0, r3, lsl #2
ldrsh r7, [r4, #2]
ldrsh r6, [r4], +r2
ldrsh r11, [r4, #2]
ldrsh r10, [r4], +r2
ldrsh r9, [r4, #2]
ldrsh r8, [r4], +r2
ldrsh lr, [r4, #2]
ldrsh r12, [r4], +r2
add r6, r6, r8
add r7, r7, r9
sub r8, r6, r8, lsl #1
sub r9, r7, r9, lsl #1
sub r4, r10, r12
sub r5, r11, lr
add r10, r10, r12
add r11, r11, lr
add r6, r6, r10
add r7, r7, r11
sub r10, r6, r10, lsl #1
sub r11, r7, r11, lsl #1
add r12, r8, r5
sub lr, r9, r4
sub r8, r8, r5
add r9, r9, r4
ldmia sp!, {r4, r5}
add r6, r6, r4
add r7, r7, r5
sub r4, r6, r4, lsl #1
sub r5, r7, r5, lsl #1
strh r7, [r1, #2]
strh r6, [r1], #4
ldmia sp!, {r6, r7}
add r8, r8, r6, asr #14
add r9, r9, r7, asr #14
sub r6, r8, r6, asr #13
sub r7, r9, r7, asr #13
strh r9, [r1, #2]
strh r8, [r1], #4
ldmia sp!, {r8, r9}
sub r10, r10, r8
add r11, r11, r9
add r8, r10, r8, lsl #1
sub r9, r11, r9, lsl #1
strh r11, [r1, #2]
strh r10, [r1], #4
ldmia sp!, {r10, r11}
add r12, r12, r10, asr #14
add lr, lr, r11, asr #14
sub r10, r12, r10, asr #13
sub r11, lr, r11, asr #13
strh lr, [r1, #2]
strh r12, [r1], #4
strh r5, [r1, #2]
strh r4, [r1], #4
strh r7, [r1, #2]
strh r6, [r1], #4
strh r9, [r1, #2]
strh r8, [r1], #4
strh r11, [r1, #2]
strh r10, [r1], #4
eor r3, r3, r2, lsr #4
tst r3, r2, lsr #4
bne LBL1
eor r3, r3, r2, lsr #5
tst r3, r2, lsr #5
bne LBL1
mov r12, r2, lsr #6
LBL2:
eor r3, r3, r12
tst r3, r12
bne LBL1
movs r12, r12, lsr #1
bne LBL2
ldmia sp!, {r1, r2}
mov r3, r2, lsr #3
mov r2, #0x20
ldr r0, =t_Q14S_8
cmp r3, #1
beq LBL3
LBL6:
mov r3, r3, lsr #2
stmdb sp!, {r1, r3}
add r12, r2, r2, lsl #1
add r1, r1, r12
sub r3, r3, #1, 16
LBL5:
add r3, r3, r2, lsl #14
LBL4:
ldrsh r6, [r0], #2
ldrsh r7, [r0], #2
ldrsh r8, [r0], #2
ldrsh r9, [r0], #2
ldrsh r10, [r0], #2
ldrsh r11, [r0], #2
ldrsh r5, [r1, #2]
ldrsh r4, [r1], -r2
sub lr, r4, r5
mul r12, lr, r11
add r11, r10, r11, lsl #1
mla r10, r4, r10, r12
mla r11, r5, r11, r12
ldrsh r5, [r1, #2]
ldrsh r4, [r1], -r2
sub lr, r4, r5
mul r12, lr, r9
add r9, r8, r9, lsl #1
mla r8, r4, r8, r12
mla r9, r5, r9, r12
ldrsh r5, [r1, #2]
ldrsh r4, [r1], -r2
sub lr, r4, r5
mul r12, lr, r7
add r7, r6, r7, lsl #1
mla r6, r4, r6, r12
mla r7, r5, r7, r12
ldrsh r5, [r1, #2]
ldrsh r4, [r1]
add r12, r4, r6, asr #14
add lr, r5, r7, asr #14
sub r4, r4, r6, asr #14
sub r5, r5, r7, asr #14
add r6, r8, r10
add r7, r9, r11
sub r8, r8, r10
sub r9, r9, r11
add r10, r12, r6, asr #14
add r11, lr, r7, asr #14
strh r11, [r1, #2]
strh r10, [r1], +r2
sub r10, r4, r9, asr #14
add r11, r5, r8, asr #14
strh r11, [r1, #2]
strh r10, [r1], +r2
sub r10, r12, r6, asr #14
sub r11, lr, r7, asr #14
strh r11, [r1, #2]
strh r10, [r1], +r2
add r10, r4, r9, asr #14
sub r11, r5, r8, asr #14
strh r11, [r1, #2]
strh r10, [r1], #4
subs r3, r3, #1, 16
bge LBL4
add r12, r2, r2, lsl #1
add r1, r1, r12
sub r0, r0, r12
sub r3, r3, #1
movs lr, r3, lsl #16
bne LBL5
add r0, r0, r12
ldmia sp!, {r1, r3}
mov r2, r2, lsl #2
cmp r3, #2
bgt LBL6
LBL3:
mov r0, #0
ldmia sp!, {r4 - r11, pc}
andeq r3, r1, r0, lsr #32
andeq r10, r1, r12, ror #31
andeq r3, r1, r8, lsr #32

View File

@ -1517,11 +1517,7 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons
int outCFFT;
WebRtc_Word16 fft[PART_LEN4];
#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
WebRtc_Word16 postFft[PART_LEN4];
#else
WebRtc_Word16 postFft[PART_LEN2];
#endif
WebRtc_Word16 dfwReal[PART_LEN1];
WebRtc_Word16 dfwImag[PART_LEN1];
WebRtc_Word16 xfwReal[PART_LEN1];
@ -1635,18 +1631,6 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons
// Fourier transformation of near end signal.
// The result is scaled with 1/PART_LEN2, that is, the result is in Q(-6) for PART_LEN = 32
#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
outCFFT = WebRtcSpl_ComplexFFT2(fft, postFft, PART_LEN_SHIFT, 1);
// The imaginary part has to switch sign
for(i = 1; i < PART_LEN2-1;)
{
postFft[i] = -postFft[i];
i += 2;
postFft[i] = -postFft[i];
i += 2;
}
#else
WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
@ -1661,17 +1645,12 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons
postFft[i] = -postFft[i];
i += 2;
}
#endif
// Extract imaginary and real part, calculate the magnitude for all frequency bins
dfwImag[0] = 0;
dfwImag[PART_LEN] = 0;
dfwReal[0] = postFft[0];
#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
dfwReal[PART_LEN] = postFft[PART_LEN2];
#else
dfwReal[PART_LEN] = fft[PART_LEN2];
#endif
dfaNoisy[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[0]);
dfaNoisy[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[PART_LEN]);
dfaNoisySum = (WebRtc_UWord32)(dfaNoisy[0]);
@ -1758,19 +1737,6 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons
// Fourier transformation of near end signal.
// The result is scaled with 1/PART_LEN2, that is, in Q(-6) for PART_LEN = 32
#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
outCFFT = WebRtcSpl_ComplexFFT2(fft, postFft, PART_LEN_SHIFT, 1);
// The imaginary part has to switch sign
for(i = 1; i < PART_LEN2-1;)
{
postFft[i] = -postFft[i];
i += 2;
postFft[i] = -postFft[i];
i += 2;
}
#else
WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
@ -1785,17 +1751,12 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons
postFft[i] = -postFft[i];
i += 2;
}
#endif
// Extract imaginary and real part, calculate the magnitude for all frequency bins
dfwImag[0] = 0;
dfwImag[PART_LEN] = 0;
dfwReal[0] = postFft[0];
#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
dfwReal[PART_LEN] = postFft[PART_LEN2];
#else
dfwReal[PART_LEN] = fft[PART_LEN2];
#endif
dfaClean[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[0]);
dfaClean[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[PART_LEN]);
@ -1874,18 +1835,6 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons
}
// Fourier transformation of far end signal.
// The result is scaled with 1/PART_LEN2, that is the result is in Q(-6) for PART_LEN = 32
#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
outCFFT = WebRtcSpl_ComplexFFT2(fft, postFft, PART_LEN_SHIFT, 1);
// The imaginary part has to switch sign
for(i = 1; i < PART_LEN2-1;)
{
postFft[i] = -postFft[i];
i += 2;
postFft[i] = -postFft[i];
i += 2;
}
#else
WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
@ -1900,17 +1849,12 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons
postFft[i] = -postFft[i];
i += 2;
}
#endif
// Extract imaginary and real part, calculate the magnitude for all frequency bins
xfwImag[0] = 0;
xfwImag[PART_LEN] = 0;
xfwReal[0] = postFft[0];
#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
xfwReal[PART_LEN] = postFft[PART_LEN2];
#else
xfwReal[PART_LEN] = fft[PART_LEN2];
#endif
xfa[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(xfwReal[0]);
xfa[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(xfwReal[PART_LEN]);
xfaSum = (WebRtc_UWord32)(xfa[0]) + (WebRtc_UWord32)(xfa[PART_LEN]);
@ -2296,7 +2240,6 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons
fft[PART_LEN2] = efwReal[PART_LEN];
fft[PART_LEN2 + 1] = -efwImag[PART_LEN];
#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
// inverse FFT, result should be scaled with outCFFT
WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1);
@ -2307,20 +2250,6 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons
j = WEBRTC_SPL_LSHIFT_W32(i, 1);
fft[i] = fft[j];
}
#else
outCFFT = WebRtcSpl_ComplexIFFT2(fft, postFft, PART_LEN_SHIFT, 1);
//take only the real values and scale with outCFFT
for(i = 0, j = 0; i < PART_LEN2;)
{
fft[i] = postFft[j];
i += 1;
j += 2;
fft[i] = postFft[j];
i += 1;
j += 2;
}
#endif
for (i = 0; i < PART_LEN; i++)
{