bionic/libm/x86_64/s_cbrt.S
Jingwei Zhang 5d4f0e6a26 Add the optimized implementation of 18 math functions for x86 and x86_64 respectively
Change-Id: I31bf601448a9427f825517f3a0ff24de47f49bfa
Signed-off-by: Jingwei Zhang <jingwei.zhang@intel.com>
Signed-off-by: Mingwei Shi <mingwei.shi@intel.com>
2015-03-09 13:19:08 -07:00

755 lines
16 KiB
ArmAsm

/*
Copyright (c) 2014, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/******************************************************************************/
// ALGORITHM DESCRIPTION
// ---------------------
//
// Assume x=2^{3*k+j} * 1.b1 b2 ... b5 b6 ... b52, where j = 0,1,2.
// Let r=(x*2^{-3k-j} - 1.b1 b2 ... b5 1)* rcp[b1 b2 ..b5],
// where rcp[b1 b2 .. b5]=1/(1.b1 b2 b3 b4 b5 1) in double precision
// cbrt(2^j * 1. b1 b2 .. b5 1) is approximated as T[j][b1..b5]+D[j][b1..b5]
// (T stores the high 53 bits, D stores the low order bits)
// Result=2^k*T+(2^k*T*r)*P+2^k*D
// where P=p1+p2*r+..+p8*r^7
//
// Special cases:
// cbrt(NaN) = quiet NaN, and raise invalid exception
// cbrt(INF) = that INF
// cbrt(+/-0) = +/-0
//
/******************************************************************************/
#include <private/bionic_asm.h>
# -- Begin cbrt
ENTRY(cbrt)
# parameter 1: %xmm0
..B1.1:
..___tag_value_cbrt.1:
subq $24, %rsp
..___tag_value_cbrt.3:
movsd %xmm0, (%rsp)
..B1.2:
movq %xmm0, %xmm7
movl $524032, %edx
movsd EXP_MSK3(%rip), %xmm5
movsd EXP_MSK2(%rip), %xmm3
psrlq $44, %xmm7
pextrw $0, %xmm7, %ecx
movd %xmm7, %eax
movsd EXP_MASK(%rip), %xmm1
movsd SIG_MASK(%rip), %xmm2
andl $248, %ecx
lea rcp_table(%rip), %r8
movsd (%rcx,%r8), %xmm4
movq %rax, %r9
andl %eax, %edx
cmpl $0, %edx
je .L_2TAG_PACKET_0.0.1
cmpl $524032, %edx
je .L_2TAG_PACKET_1.0.1
shrl $8, %edx
shrq $8, %r9
andpd %xmm0, %xmm2
andpd %xmm5, %xmm0
orpd %xmm2, %xmm3
orpd %xmm0, %xmm1
movapd coeff_table(%rip), %xmm5
movl $5462, %eax
movapd 16+coeff_table(%rip), %xmm6
mull %edx
movq %r9, %rdx
andq $2047, %r9
shrl $14, %eax
andl $2048, %edx
subq %rax, %r9
subq %rax, %r9
subq %rax, %r9
shlq $8, %r9
addl $682, %eax
orl %edx, %eax
movd %eax, %xmm7
addq %r9, %rcx
psllq $52, %xmm7
.L_2TAG_PACKET_2.0.1:
movapd 32+coeff_table(%rip), %xmm2
movapd 48+coeff_table(%rip), %xmm0
subsd %xmm3, %xmm1
movq %xmm7, %xmm3
lea cbrt_table(%rip), %r8
mulsd (%rcx,%r8), %xmm7
mulsd %xmm4, %xmm1
lea D_table(%rip), %r8
mulsd (%rcx,%r8), %xmm3
movapd %xmm1, %xmm4
unpcklpd %xmm1, %xmm1
mulpd %xmm1, %xmm5
mulpd %xmm1, %xmm6
mulpd %xmm1, %xmm1
addpd %xmm5, %xmm2
addpd %xmm6, %xmm0
mulpd %xmm1, %xmm2
mulpd %xmm1, %xmm1
mulsd %xmm7, %xmm4
addpd %xmm2, %xmm0
mulsd %xmm0, %xmm1
unpckhpd %xmm0, %xmm0
addsd %xmm1, %xmm0
mulsd %xmm4, %xmm0
addsd %xmm3, %xmm0
addsd %xmm7, %xmm0
jmp ..B1.4
.L_2TAG_PACKET_0.0.1:
mulsd SCALE63(%rip), %xmm0
movq %xmm0, %xmm7
movl $524032, %edx
psrlq $44, %xmm7
pextrw $0, %xmm7, %ecx
movd %xmm7, %eax
andl $248, %ecx
lea rcp_table(%rip), %r8
movsd (%rcx,%r8), %xmm4
movq %rax, %r9
andl %eax, %edx
shrl $8, %edx
shrq $8, %r9
cmpl $0, %edx
je .L_2TAG_PACKET_3.0.1
andpd %xmm0, %xmm2
andpd %xmm5, %xmm0
orpd %xmm2, %xmm3
orpd %xmm0, %xmm1
movapd coeff_table(%rip), %xmm5
movl $5462, %eax
movapd 16+coeff_table(%rip), %xmm6
mull %edx
movq %r9, %rdx
andq $2047, %r9
shrl $14, %eax
andl $2048, %edx
subq %rax, %r9
subq %rax, %r9
subq %rax, %r9
shlq $8, %r9
addl $661, %eax
orl %edx, %eax
movd %eax, %xmm7
addq %r9, %rcx
psllq $52, %xmm7
jmp .L_2TAG_PACKET_2.0.1
.L_2TAG_PACKET_3.0.1:
cmpq $0, %r9
jne .L_2TAG_PACKET_4.0.1
xorpd %xmm0, %xmm0
jmp ..B1.4
.L_2TAG_PACKET_4.0.1:
movsd ZERON(%rip), %xmm0
jmp ..B1.4
.L_2TAG_PACKET_1.0.1:
movl 4(%rsp), %eax
movl (%rsp), %edx
movl %eax, %ecx
andl $2147483647, %ecx
cmpl $2146435072, %ecx
ja .L_2TAG_PACKET_5.0.1
cmpl $0, %edx
jne .L_2TAG_PACKET_5.0.1
cmpl $2146435072, %eax
jne .L_2TAG_PACKET_6.0.1
movsd INF(%rip), %xmm0
jmp ..B1.4
.L_2TAG_PACKET_6.0.1:
movsd NEG_INF(%rip), %xmm0
jmp ..B1.4
.L_2TAG_PACKET_5.0.1:
movsd (%rsp), %xmm0
addsd %xmm0, %xmm0
movq %xmm0, 8(%rsp)
.L_2TAG_PACKET_7.0.1:
..B1.4:
addq $24, %rsp
..___tag_value_cbrt.4:
ret
..___tag_value_cbrt.5:
END(cbrt)
# -- End cbrt
.section .rodata, "a"
.align 16
.align 16
coeff_table:
.long 1553778919
.long 3213899486
.long 3534952507
.long 3215266280
.long 1646371399
.long 3214412045
.long 477218588
.long 3216798151
.long 3582521621
.long 1066628362
.long 1007461464
.long 1068473053
.long 889629714
.long 1067378449
.long 1431655765
.long 1070945621
.type coeff_table,@object
.size coeff_table,64
.align 4
EXP_MSK3:
.long 4294967295
.long 1048575
.type EXP_MSK3,@object
.size EXP_MSK3,8
.align 4
EXP_MSK2:
.long 0
.long 3220193280
.type EXP_MSK2,@object
.size EXP_MSK2,8
.align 4
EXP_MASK:
.long 0
.long 3220176896
.type EXP_MASK,@object
.size EXP_MASK,8
.align 4
SIG_MASK:
.long 0
.long 1032192
.type SIG_MASK,@object
.size SIG_MASK,8
.align 4
rcp_table:
.long 528611360
.long 3220144632
.long 2884679527
.long 3220082993
.long 1991868891
.long 3220024928
.long 2298714891
.long 3219970134
.long 58835168
.long 3219918343
.long 3035110223
.long 3219869313
.long 1617585086
.long 3219822831
.long 2500867033
.long 3219778702
.long 4241943008
.long 3219736752
.long 258732970
.long 3219696825
.long 404232216
.long 3219658776
.long 2172167368
.long 3219622476
.long 1544257904
.long 3219587808
.long 377579543
.long 3219554664
.long 1616385542
.long 3219522945
.long 813783277
.long 3219492562
.long 3940743189
.long 3219463431
.long 2689777499
.long 3219435478
.long 1700977147
.long 3219408632
.long 3169102082
.long 3219382828
.long 327235604
.long 3219358008
.long 1244336319
.long 3219334115
.long 1300311200
.long 3219311099
.long 3095471925
.long 3219288912
.long 2166487928
.long 3219267511
.long 2913108253
.long 3219246854
.long 293672978
.long 3219226904
.long 288737297
.long 3219207624
.long 1810275472
.long 3219188981
.long 174592167
.long 3219170945
.long 3539053052
.long 3219153485
.long 2164392968
.long 3219136576
.type rcp_table,@object
.size rcp_table,256
.align 4
cbrt_table:
.long 572345495
.long 1072698681
.long 1998204467
.long 1072709382
.long 3861501553
.long 1072719872
.long 2268192434
.long 1072730162
.long 2981979308
.long 1072740260
.long 270859143
.long 1072750176
.long 2958651392
.long 1072759916
.long 313113243
.long 1072769490
.long 919449400
.long 1072778903
.long 2809328903
.long 1072788162
.long 2222981587
.long 1072797274
.long 2352530781
.long 1072806244
.long 594152517
.long 1072815078
.long 1555767199
.long 1072823780
.long 4282421314
.long 1072832355
.long 2355578597
.long 1072840809
.long 1162590619
.long 1072849145
.long 797864051
.long 1072857367
.long 431273680
.long 1072865479
.long 2669831148
.long 1072873484
.long 733477752
.long 1072881387
.long 4280220604
.long 1072889189
.long 801961634
.long 1072896896
.long 2915370760
.long 1072904508
.long 1159613482
.long 1072912030
.long 2689944798
.long 1072919463
.long 1248687822
.long 1072926811
.long 2967951030
.long 1072934075
.long 630170432
.long 1072941259
.long 3760898254
.long 1072948363
.long 0
.long 1072955392
.long 2370273294
.long 1072962345
.long 1261754802
.long 1072972640
.long 546334065
.long 1072986123
.long 1054893830
.long 1072999340
.long 1571187597
.long 1073012304
.long 1107975175
.long 1073025027
.long 3606909377
.long 1073037519
.long 1113616747
.long 1073049792
.long 4154744632
.long 1073061853
.long 3358931423
.long 1073073713
.long 4060702372
.long 1073085379
.long 747576176
.long 1073096860
.long 3023138255
.long 1073108161
.long 1419988548
.long 1073119291
.long 1914185305
.long 1073130255
.long 294389948
.long 1073141060
.long 3761802570
.long 1073151710
.long 978281566
.long 1073162213
.long 823148820
.long 1073172572
.long 2420954441
.long 1073182792
.long 3815449908
.long 1073192878
.long 2046058587
.long 1073202835
.long 1807524753
.long 1073212666
.long 2628681401
.long 1073222375
.long 3225667357
.long 1073231966
.long 1555307421
.long 1073241443
.long 3454043099
.long 1073250808
.long 1208137896
.long 1073260066
.long 3659916772
.long 1073269218
.long 1886261264
.long 1073278269
.long 3593647839
.long 1073287220
.long 3086012205
.long 1073296075
.long 2769796922
.long 1073304836
.long 888716057
.long 1073317807
.long 2201465623
.long 1073334794
.long 164369365
.long 1073351447
.long 3462666733
.long 1073367780
.long 2773905457
.long 1073383810
.long 1342879088
.long 1073399550
.long 2543933975
.long 1073415012
.long 1684477781
.long 1073430209
.long 3532178543
.long 1073445151
.long 1147747300
.long 1073459850
.long 1928031793
.long 1073474314
.long 2079717015
.long 1073488553
.long 4016765315
.long 1073502575
.long 3670431139
.long 1073516389
.long 3549227225
.long 1073530002
.long 11637607
.long 1073543422
.long 588220169
.long 1073556654
.long 2635407503
.long 1073569705
.long 2042029317
.long 1073582582
.long 1925128962
.long 1073595290
.long 4136375664
.long 1073607834
.long 759964600
.long 1073620221
.long 4257606771
.long 1073632453
.long 297278907
.long 1073644538
.long 3655053093
.long 1073656477
.long 2442253172
.long 1073668277
.long 1111876799
.long 1073679941
.long 3330973139
.long 1073691472
.long 3438879452
.long 1073702875
.long 3671565478
.long 1073714153
.long 1317849547
.long 1073725310
.long 1642364115
.long 1073736348
.type cbrt_table,@object
.size cbrt_table,768
.align 4
D_table:
.long 4050900474
.long 1014427190
.long 1157977860
.long 1016444461
.long 1374568199
.long 1017271387
.long 2809163288
.long 1016882676
.long 3742377377
.long 1013168191
.long 3101606597
.long 1017541672
.long 65224358
.long 1017217597
.long 2691591250
.long 1017266643
.long 4020758549
.long 1017689313
.long 1316310992
.long 1018030788
.long 1031537856
.long 1014090882
.long 3261395239
.long 1016413641
.long 886424999
.long 1016313335
.long 3114776834
.long 1014195875
.long 1681120620
.long 1017825416
.long 1329600273
.long 1016625740
.long 465474623
.long 1017097119
.long 4251633980
.long 1017169077
.long 1986990133
.long 1017710645
.long 752958613
.long 1017159641
.long 2216216792
.long 1018020163
.long 4282860129
.long 1015924861
.long 1557627859
.long 1016039538
.long 3889219754
.long 1018086237
.long 3684996408
.long 1017353275
.long 723532103
.long 1017717141
.long 2951149676
.long 1012528470
.long 831890937
.long 1017830553
.long 1031212645
.long 1017387331
.long 2741737450
.long 1017604974
.long 2863311531
.long 1003776682
.long 4276736099
.long 1013153088
.long 4111778382
.long 1015673686
.long 1728065769
.long 1016413986
.long 2708718031
.long 1018078833
.long 1069335005
.long 1015291224
.long 700037144
.long 1016482032
.long 2904566452
.long 1017226861
.long 4074156649
.long 1017622651
.long 25019565
.long 1015245366
.long 3601952608
.long 1015771755
.long 3267129373
.long 1017904664
.long 503203103
.long 1014921629
.long 2122011730
.long 1018027866
.long 3927295461
.long 1014189456
.long 2790625147
.long 1016024251
.long 1330460186
.long 1016940346
.long 4033568463
.long 1015538390
.long 3695818227
.long 1017509621
.long 257573361
.long 1017208868
.long 3227697852
.long 1017337964
.long 234118548
.long 1017169577
.long 4009025803
.long 1017278524
.long 1948343394
.long 1017749310
.long 678398162
.long 1018144239
.long 3083864863
.long 1016669086
.long 2415453452
.long 1017890370
.long 175467344
.long 1017330033
.long 3197359580
.long 1010339928
.long 2071276951
.long 1015941358
.long 268372543
.long 1016737773
.long 938132959
.long 1017389108
.long 1816750559
.long 1017337448
.long 4119203749
.long 1017152174
.long 2578653878
.long 1013108497
.long 2470331096
.long 1014678606
.long 123855735
.long 1016553320
.long 1265650889
.long 1014782687
.long 3414398172
.long 1017182638
.long 1040773369
.long 1016158401
.long 3483628886
.long 1016886550
.long 4140499405
.long 1016191425
.long 3893477850
.long 1016964495
.long 3935319771
.long 1009634717
.long 2978982660
.long 1015027112
.long 2452709923
.long 1017990229
.long 3190365712
.long 1015835149
.long 4237588139
.long 1015832925
.long 2610678389
.long 1017962711
.long 2127316774
.long 1017405770
.long 824267502
.long 1017959463
.long 2165924042
.long 1017912225
.long 2774007076
.long 1013257418
.long 4123916326
.long 1017582284
.long 1976417958
.long 1016959909
.long 4092806412
.long 1017711279
.long 119251817
.long 1015363631
.long 3475418768
.long 1017675415
.long 1972580503
.long 1015470684
.long 815541017
.long 1017517969
.long 2429917451
.long 1017397776
.long 4062888482
.long 1016749897
.long 68284153
.long 1017925678
.long 2207779246
.long 1016320298
.long 1183466520
.long 1017408657
.long 143326427
.long 1017060403
.type D_table,@object
.size D_table,768
.align 4
SCALE63:
.long 0
.long 1138753536
.type SCALE63,@object
.size SCALE63,8
.align 4
ZERON:
.long 0
.long 2147483648
.type ZERON,@object
.size ZERON,8
.align 4
INF:
.long 0
.long 2146435072
.type INF,@object
.size INF,8
.align 4
NEG_INF:
.long 0
.long 4293918720
.type NEG_INF,@object
.size NEG_INF,8
.data
.section .note.GNU-stack, ""
// -- Begin DWARF2 SEGMENT .eh_frame
.section .eh_frame,"a",@progbits
.eh_frame_seg:
.align 1
.4byte 0x00000014
.8byte 0x00527a0100000000
.8byte 0x08070c1b01107801
.4byte 0x00000190
.4byte 0x0000001c
.4byte 0x0000001c
.4byte ..___tag_value_cbrt.1-.
.4byte ..___tag_value_cbrt.5-..___tag_value_cbrt.1
.2byte 0x0400
.4byte ..___tag_value_cbrt.3-..___tag_value_cbrt.1
.2byte 0x200e
.byte 0x04
.4byte ..___tag_value_cbrt.4-..___tag_value_cbrt.3
.2byte 0x080e
.byte 0x00
# End