Merge "Make all labels local."
This commit is contained in:
commit
a3a619835d
@ -70,7 +70,7 @@
|
||||
|
||||
.macro m_scan_byte
|
||||
ldrb r3, [r0]
|
||||
cbz r3, strcat_r0_scan_done
|
||||
cbz r3, .L_strcat_r0_scan_done
|
||||
add r0, #1
|
||||
.endm // m_scan_byte
|
||||
|
||||
@ -84,10 +84,10 @@ ENTRY(strcat)
|
||||
// Quick check to see if src is empty.
|
||||
ldrb r2, [r1]
|
||||
pld [r1, #0]
|
||||
cbnz r2, strcat_continue
|
||||
cbnz r2, .L_strcat_continue
|
||||
bx lr
|
||||
|
||||
strcat_continue:
|
||||
.L_strcat_continue:
|
||||
// To speed up really small dst strings, unroll checking the first 4 bytes.
|
||||
m_push
|
||||
m_scan_byte
|
||||
@ -96,95 +96,95 @@ strcat_continue:
|
||||
m_scan_byte
|
||||
|
||||
ands r3, r0, #7
|
||||
beq strcat_mainloop
|
||||
beq .L_strcat_mainloop
|
||||
|
||||
// Align to a double word (64 bits).
|
||||
rsb r3, r3, #8
|
||||
lsls ip, r3, #31
|
||||
beq strcat_align_to_32
|
||||
beq .L_strcat_align_to_32
|
||||
|
||||
ldrb r5, [r0]
|
||||
cbz r5, strcat_r0_scan_done
|
||||
cbz r5, .L_strcat_r0_scan_done
|
||||
add r0, r0, #1
|
||||
|
||||
strcat_align_to_32:
|
||||
bcc strcat_align_to_64
|
||||
.L_strcat_align_to_32:
|
||||
bcc .L_strcat_align_to_64
|
||||
|
||||
ldrb r2, [r0]
|
||||
cbz r2, strcat_r0_scan_done
|
||||
cbz r2, .L_strcat_r0_scan_done
|
||||
add r0, r0, #1
|
||||
ldrb r4, [r0]
|
||||
cbz r4, strcat_r0_scan_done
|
||||
cbz r4, .L_strcat_r0_scan_done
|
||||
add r0, r0, #1
|
||||
|
||||
strcat_align_to_64:
|
||||
.L_strcat_align_to_64:
|
||||
tst r3, #4
|
||||
beq strcat_mainloop
|
||||
beq .L_strcat_mainloop
|
||||
ldr r3, [r0], #4
|
||||
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcat_zero_in_second_register
|
||||
b strcat_mainloop
|
||||
bne .L_strcat_zero_in_second_register
|
||||
b .L_strcat_mainloop
|
||||
|
||||
strcat_r0_scan_done:
|
||||
.L_strcat_r0_scan_done:
|
||||
// For short copies, hard-code checking the first 8 bytes since this
|
||||
// new code doesn't win until after about 8 bytes.
|
||||
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
|
||||
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
|
||||
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
|
||||
m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
|
||||
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
|
||||
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
|
||||
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
|
||||
m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
|
||||
m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
|
||||
m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
|
||||
m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
|
||||
m_copy_byte reg=r5, cmd=cbz, label=.L_strcpy_finish
|
||||
m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
|
||||
m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
|
||||
m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
|
||||
m_copy_byte reg=r5, cmd=cbnz, label=.L_strcpy_continue
|
||||
|
||||
strcpy_finish:
|
||||
.L_strcpy_finish:
|
||||
m_pop
|
||||
|
||||
strcpy_continue:
|
||||
.L_strcpy_continue:
|
||||
ands r3, r0, #7
|
||||
beq strcpy_check_src_align
|
||||
beq .L_strcpy_check_src_align
|
||||
|
||||
// Align to a double word (64 bits).
|
||||
rsb r3, r3, #8
|
||||
lsls ip, r3, #31
|
||||
beq strcpy_align_to_32
|
||||
beq .L_strcpy_align_to_32
|
||||
|
||||
ldrb r2, [r1], #1
|
||||
strb r2, [r0], #1
|
||||
cbz r2, strcpy_complete
|
||||
cbz r2, .L_strcpy_complete
|
||||
|
||||
strcpy_align_to_32:
|
||||
bcc strcpy_align_to_64
|
||||
.L_strcpy_align_to_32:
|
||||
bcc .L_strcpy_align_to_64
|
||||
|
||||
ldrb r2, [r1], #1
|
||||
strb r2, [r0], #1
|
||||
cbz r2, strcpy_complete
|
||||
cbz r2, .L_strcpy_complete
|
||||
ldrb r2, [r1], #1
|
||||
strb r2, [r0], #1
|
||||
cbz r2, strcpy_complete
|
||||
cbz r2, .L_strcpy_complete
|
||||
|
||||
strcpy_align_to_64:
|
||||
.L_strcpy_align_to_64:
|
||||
tst r3, #4
|
||||
beq strcpy_check_src_align
|
||||
beq .L_strcpy_check_src_align
|
||||
ldr r2, [r1], #4
|
||||
|
||||
sub ip, r2, #0x01010101
|
||||
bic ip, ip, r2
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_first_register
|
||||
bne .L_strcpy_zero_in_first_register
|
||||
str r2, [r0], #4
|
||||
|
||||
strcpy_check_src_align:
|
||||
.L_strcpy_check_src_align:
|
||||
// At this point dst is aligned to a double word, check if src
|
||||
// is also aligned to a double word.
|
||||
ands r3, r1, #7
|
||||
bne strcpy_unaligned_copy
|
||||
bne .L_strcpy_unaligned_copy
|
||||
|
||||
.p2align 2
|
||||
strcpy_mainloop:
|
||||
.L_strcpy_mainloop:
|
||||
ldrd r2, r3, [r1], #8
|
||||
|
||||
pld [r1, #64]
|
||||
@ -192,128 +192,128 @@ strcpy_mainloop:
|
||||
sub ip, r2, #0x01010101
|
||||
bic ip, ip, r2
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_first_register
|
||||
bne .L_strcpy_zero_in_first_register
|
||||
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_second_register
|
||||
bne .L_strcpy_zero_in_second_register
|
||||
|
||||
strd r2, r3, [r0], #8
|
||||
b strcpy_mainloop
|
||||
b .L_strcpy_mainloop
|
||||
|
||||
strcpy_complete:
|
||||
.L_strcpy_complete:
|
||||
m_pop
|
||||
|
||||
strcpy_zero_in_first_register:
|
||||
.L_strcpy_zero_in_first_register:
|
||||
lsls lr, ip, #17
|
||||
bne strcpy_copy1byte
|
||||
bcs strcpy_copy2bytes
|
||||
bne .L_strcpy_copy1byte
|
||||
bcs .L_strcpy_copy2bytes
|
||||
lsls ip, ip, #1
|
||||
bne strcpy_copy3bytes
|
||||
bne .L_strcpy_copy3bytes
|
||||
|
||||
strcpy_copy4bytes:
|
||||
.L_strcpy_copy4bytes:
|
||||
// Copy 4 bytes to the destiniation.
|
||||
str r2, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_copy1byte:
|
||||
.L_strcpy_copy1byte:
|
||||
strb r2, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_copy2bytes:
|
||||
.L_strcpy_copy2bytes:
|
||||
strh r2, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_copy3bytes:
|
||||
.L_strcpy_copy3bytes:
|
||||
strh r2, [r0], #2
|
||||
lsr r2, #16
|
||||
strb r2, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_zero_in_second_register:
|
||||
.L_strcpy_zero_in_second_register:
|
||||
lsls lr, ip, #17
|
||||
bne strcpy_copy5bytes
|
||||
bcs strcpy_copy6bytes
|
||||
bne .L_strcpy_copy5bytes
|
||||
bcs .L_strcpy_copy6bytes
|
||||
lsls ip, ip, #1
|
||||
bne strcpy_copy7bytes
|
||||
bne .L_strcpy_copy7bytes
|
||||
|
||||
// Copy 8 bytes to the destination.
|
||||
strd r2, r3, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_copy5bytes:
|
||||
.L_strcpy_copy5bytes:
|
||||
str r2, [r0], #4
|
||||
strb r3, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_copy6bytes:
|
||||
.L_strcpy_copy6bytes:
|
||||
str r2, [r0], #4
|
||||
strh r3, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_copy7bytes:
|
||||
.L_strcpy_copy7bytes:
|
||||
str r2, [r0], #4
|
||||
strh r3, [r0], #2
|
||||
lsr r3, #16
|
||||
strb r3, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_unaligned_copy:
|
||||
.L_strcpy_unaligned_copy:
|
||||
// Dst is aligned to a double word, while src is at an unknown alignment.
|
||||
// There are 7 different versions of the unaligned copy code
|
||||
// to prevent overreading the src. The mainloop of every single version
|
||||
// will store 64 bits per loop. The difference is how much of src can
|
||||
// be read without potentially crossing a page boundary.
|
||||
tbb [pc, r3]
|
||||
strcpy_unaligned_branchtable:
|
||||
.L_strcpy_unaligned_branchtable:
|
||||
.byte 0
|
||||
.byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
|
||||
.byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
|
||||
.byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
|
||||
.byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
|
||||
.byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
|
||||
.byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
|
||||
.byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
|
||||
.byte ((.L_strcpy_unalign7 - .L_strcpy_unaligned_branchtable)/2)
|
||||
.byte ((.L_strcpy_unalign6 - .L_strcpy_unaligned_branchtable)/2)
|
||||
.byte ((.L_strcpy_unalign5 - .L_strcpy_unaligned_branchtable)/2)
|
||||
.byte ((.L_strcpy_unalign4 - .L_strcpy_unaligned_branchtable)/2)
|
||||
.byte ((.L_strcpy_unalign3 - .L_strcpy_unaligned_branchtable)/2)
|
||||
.byte ((.L_strcpy_unalign2 - .L_strcpy_unaligned_branchtable)/2)
|
||||
.byte ((.L_strcpy_unalign1 - .L_strcpy_unaligned_branchtable)/2)
|
||||
|
||||
.p2align 2
|
||||
// Can read 7 bytes before possibly crossing a page.
|
||||
strcpy_unalign7:
|
||||
.L_strcpy_unalign7:
|
||||
ldr r2, [r1], #4
|
||||
|
||||
sub ip, r2, #0x01010101
|
||||
bic ip, ip, r2
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_first_register
|
||||
bne .L_strcpy_zero_in_first_register
|
||||
|
||||
ldrb r3, [r1]
|
||||
cbz r3, strcpy_unalign7_copy5bytes
|
||||
cbz r3, .L_strcpy_unalign7_copy5bytes
|
||||
ldrb r4, [r1, #1]
|
||||
cbz r4, strcpy_unalign7_copy6bytes
|
||||
cbz r4, .L_strcpy_unalign7_copy6bytes
|
||||
ldrb r5, [r1, #2]
|
||||
cbz r5, strcpy_unalign7_copy7bytes
|
||||
cbz r5, .L_strcpy_unalign7_copy7bytes
|
||||
|
||||
ldr r3, [r1], #4
|
||||
pld [r1, #64]
|
||||
|
||||
lsrs ip, r3, #24
|
||||
strd r2, r3, [r0], #8
|
||||
beq strcpy_unalign_return
|
||||
b strcpy_unalign7
|
||||
beq .L_strcpy_unalign_return
|
||||
b .L_strcpy_unalign7
|
||||
|
||||
strcpy_unalign7_copy5bytes:
|
||||
.L_strcpy_unalign7_copy5bytes:
|
||||
str r2, [r0], #4
|
||||
strb r3, [r0]
|
||||
strcpy_unalign_return:
|
||||
.L_strcpy_unalign_return:
|
||||
m_pop
|
||||
|
||||
strcpy_unalign7_copy6bytes:
|
||||
.L_strcpy_unalign7_copy6bytes:
|
||||
str r2, [r0], #4
|
||||
strb r3, [r0], #1
|
||||
strb r4, [r0], #1
|
||||
m_pop
|
||||
|
||||
strcpy_unalign7_copy7bytes:
|
||||
.L_strcpy_unalign7_copy7bytes:
|
||||
str r2, [r0], #4
|
||||
strb r3, [r0], #1
|
||||
strb r4, [r0], #1
|
||||
@ -322,41 +322,41 @@ strcpy_unalign7_copy7bytes:
|
||||
|
||||
.p2align 2
|
||||
// Can read 6 bytes before possibly crossing a page.
|
||||
strcpy_unalign6:
|
||||
.L_strcpy_unalign6:
|
||||
ldr r2, [r1], #4
|
||||
|
||||
sub ip, r2, #0x01010101
|
||||
bic ip, ip, r2
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_first_register
|
||||
bne .L_strcpy_zero_in_first_register
|
||||
|
||||
ldrb r4, [r1]
|
||||
cbz r4, strcpy_unalign_copy5bytes
|
||||
cbz r4, .L_strcpy_unalign_copy5bytes
|
||||
ldrb r5, [r1, #1]
|
||||
cbz r5, strcpy_unalign_copy6bytes
|
||||
cbz r5, .L_strcpy_unalign_copy6bytes
|
||||
|
||||
ldr r3, [r1], #4
|
||||
pld [r1, #64]
|
||||
|
||||
tst r3, #0xff0000
|
||||
beq strcpy_copy7bytes
|
||||
beq .L_strcpy_copy7bytes
|
||||
lsrs ip, r3, #24
|
||||
strd r2, r3, [r0], #8
|
||||
beq strcpy_unalign_return
|
||||
b strcpy_unalign6
|
||||
beq .L_strcpy_unalign_return
|
||||
b .L_strcpy_unalign6
|
||||
|
||||
.p2align 2
|
||||
// Can read 5 bytes before possibly crossing a page.
|
||||
strcpy_unalign5:
|
||||
.L_strcpy_unalign5:
|
||||
ldr r2, [r1], #4
|
||||
|
||||
sub ip, r2, #0x01010101
|
||||
bic ip, ip, r2
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_first_register
|
||||
bne .L_strcpy_zero_in_first_register
|
||||
|
||||
ldrb r4, [r1]
|
||||
cbz r4, strcpy_unalign_copy5bytes
|
||||
cbz r4, .L_strcpy_unalign_copy5bytes
|
||||
|
||||
ldr r3, [r1], #4
|
||||
|
||||
@ -365,17 +365,17 @@ strcpy_unalign5:
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_second_register
|
||||
bne .L_strcpy_zero_in_second_register
|
||||
|
||||
strd r2, r3, [r0], #8
|
||||
b strcpy_unalign5
|
||||
b .L_strcpy_unalign5
|
||||
|
||||
strcpy_unalign_copy5bytes:
|
||||
.L_strcpy_unalign_copy5bytes:
|
||||
str r2, [r0], #4
|
||||
strb r4, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_unalign_copy6bytes:
|
||||
.L_strcpy_unalign_copy6bytes:
|
||||
str r2, [r0], #4
|
||||
strb r4, [r0], #1
|
||||
strb r5, [r0]
|
||||
@ -383,13 +383,13 @@ strcpy_unalign_copy6bytes:
|
||||
|
||||
.p2align 2
|
||||
// Can read 4 bytes before possibly crossing a page.
|
||||
strcpy_unalign4:
|
||||
.L_strcpy_unalign4:
|
||||
ldr r2, [r1], #4
|
||||
|
||||
sub ip, r2, #0x01010101
|
||||
bic ip, ip, r2
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_first_register
|
||||
bne .L_strcpy_zero_in_first_register
|
||||
|
||||
ldr r3, [r1], #4
|
||||
pld [r1, #64]
|
||||
@ -397,20 +397,20 @@ strcpy_unalign4:
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_second_register
|
||||
bne .L_strcpy_zero_in_second_register
|
||||
|
||||
strd r2, r3, [r0], #8
|
||||
b strcpy_unalign4
|
||||
b .L_strcpy_unalign4
|
||||
|
||||
.p2align 2
|
||||
// Can read 3 bytes before possibly crossing a page.
|
||||
strcpy_unalign3:
|
||||
.L_strcpy_unalign3:
|
||||
ldrb r2, [r1]
|
||||
cbz r2, strcpy_unalign3_copy1byte
|
||||
cbz r2, .L_strcpy_unalign3_copy1byte
|
||||
ldrb r3, [r1, #1]
|
||||
cbz r3, strcpy_unalign3_copy2bytes
|
||||
cbz r3, .L_strcpy_unalign3_copy2bytes
|
||||
ldrb r4, [r1, #2]
|
||||
cbz r4, strcpy_unalign3_copy3bytes
|
||||
cbz r4, .L_strcpy_unalign3_copy3bytes
|
||||
|
||||
ldr r2, [r1], #4
|
||||
ldr r3, [r1], #4
|
||||
@ -418,26 +418,26 @@ strcpy_unalign3:
|
||||
pld [r1, #64]
|
||||
|
||||
lsrs lr, r2, #24
|
||||
beq strcpy_copy4bytes
|
||||
beq .L_strcpy_copy4bytes
|
||||
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_second_register
|
||||
bne .L_strcpy_zero_in_second_register
|
||||
|
||||
strd r2, r3, [r0], #8
|
||||
b strcpy_unalign3
|
||||
b .L_strcpy_unalign3
|
||||
|
||||
strcpy_unalign3_copy1byte:
|
||||
.L_strcpy_unalign3_copy1byte:
|
||||
strb r2, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_unalign3_copy2bytes:
|
||||
.L_strcpy_unalign3_copy2bytes:
|
||||
strb r2, [r0], #1
|
||||
strb r3, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_unalign3_copy3bytes:
|
||||
.L_strcpy_unalign3_copy3bytes:
|
||||
strb r2, [r0], #1
|
||||
strb r3, [r0], #1
|
||||
strb r4, [r0]
|
||||
@ -445,34 +445,34 @@ strcpy_unalign3_copy3bytes:
|
||||
|
||||
.p2align 2
|
||||
// Can read 2 bytes before possibly crossing a page.
|
||||
strcpy_unalign2:
|
||||
.L_strcpy_unalign2:
|
||||
ldrb r2, [r1]
|
||||
cbz r2, strcpy_unalign_copy1byte
|
||||
cbz r2, .L_strcpy_unalign_copy1byte
|
||||
ldrb r4, [r1, #1]
|
||||
cbz r4, strcpy_unalign_copy2bytes
|
||||
cbz r4, .L_strcpy_unalign_copy2bytes
|
||||
|
||||
ldr r2, [r1], #4
|
||||
ldr r3, [r1], #4
|
||||
pld [r1, #64]
|
||||
|
||||
tst r2, #0xff0000
|
||||
beq strcpy_copy3bytes
|
||||
beq .L_strcpy_copy3bytes
|
||||
lsrs ip, r2, #24
|
||||
beq strcpy_copy4bytes
|
||||
beq .L_strcpy_copy4bytes
|
||||
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_second_register
|
||||
bne .L_strcpy_zero_in_second_register
|
||||
|
||||
strd r2, r3, [r0], #8
|
||||
b strcpy_unalign2
|
||||
b .L_strcpy_unalign2
|
||||
|
||||
.p2align 2
|
||||
// Can read 1 byte before possibly crossing a page.
|
||||
strcpy_unalign1:
|
||||
.L_strcpy_unalign1:
|
||||
ldrb r2, [r1]
|
||||
cbz r2, strcpy_unalign_copy1byte
|
||||
cbz r2, .L_strcpy_unalign_copy1byte
|
||||
|
||||
ldr r2, [r1], #4
|
||||
ldr r3, [r1], #4
|
||||
@ -482,27 +482,27 @@ strcpy_unalign1:
|
||||
sub ip, r2, #0x01010101
|
||||
bic ip, ip, r2
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_first_register
|
||||
bne .L_strcpy_zero_in_first_register
|
||||
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcpy_zero_in_second_register
|
||||
bne .L_strcpy_zero_in_second_register
|
||||
|
||||
strd r2, r3, [r0], #8
|
||||
b strcpy_unalign1
|
||||
b .L_strcpy_unalign1
|
||||
|
||||
strcpy_unalign_copy1byte:
|
||||
.L_strcpy_unalign_copy1byte:
|
||||
strb r2, [r0]
|
||||
m_pop
|
||||
|
||||
strcpy_unalign_copy2bytes:
|
||||
.L_strcpy_unalign_copy2bytes:
|
||||
strb r2, [r0], #1
|
||||
strb r4, [r0]
|
||||
m_pop
|
||||
|
||||
.p2align 2
|
||||
strcat_mainloop:
|
||||
.L_strcat_mainloop:
|
||||
ldrd r2, r3, [r0], #8
|
||||
|
||||
pld [r0, #64]
|
||||
@ -510,59 +510,59 @@ strcat_mainloop:
|
||||
sub ip, r2, #0x01010101
|
||||
bic ip, ip, r2
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcat_zero_in_first_register
|
||||
bne .L_strcat_zero_in_first_register
|
||||
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne strcat_zero_in_second_register
|
||||
b strcat_mainloop
|
||||
bne .L_strcat_zero_in_second_register
|
||||
b .L_strcat_mainloop
|
||||
|
||||
strcat_zero_in_first_register:
|
||||
.L_strcat_zero_in_first_register:
|
||||
// Prefetch the src now, it's going to be used soon.
|
||||
pld [r1, #0]
|
||||
lsls lr, ip, #17
|
||||
bne strcat_sub8
|
||||
bcs strcat_sub7
|
||||
bne .L_strcat_sub8
|
||||
bcs .L_strcat_sub7
|
||||
lsls ip, ip, #1
|
||||
bne strcat_sub6
|
||||
bne .L_strcat_sub6
|
||||
|
||||
sub r0, r0, #5
|
||||
b strcat_r0_scan_done
|
||||
b .L_strcat_r0_scan_done
|
||||
|
||||
strcat_sub8:
|
||||
.L_strcat_sub8:
|
||||
sub r0, r0, #8
|
||||
b strcat_r0_scan_done
|
||||
b .L_strcat_r0_scan_done
|
||||
|
||||
strcat_sub7:
|
||||
.L_strcat_sub7:
|
||||
sub r0, r0, #7
|
||||
b strcat_r0_scan_done
|
||||
b .L_strcat_r0_scan_done
|
||||
|
||||
strcat_sub6:
|
||||
.L_strcat_sub6:
|
||||
sub r0, r0, #6
|
||||
b strcat_r0_scan_done
|
||||
b .L_strcat_r0_scan_done
|
||||
|
||||
strcat_zero_in_second_register:
|
||||
.L_strcat_zero_in_second_register:
|
||||
// Prefetch the src now, it's going to be used soon.
|
||||
pld [r1, #0]
|
||||
lsls lr, ip, #17
|
||||
bne strcat_sub4
|
||||
bcs strcat_sub3
|
||||
bne .L_strcat_sub4
|
||||
bcs .L_strcat_sub3
|
||||
lsls ip, ip, #1
|
||||
bne strcat_sub2
|
||||
bne .L_strcat_sub2
|
||||
|
||||
sub r0, r0, #1
|
||||
b strcat_r0_scan_done
|
||||
b .L_strcat_r0_scan_done
|
||||
|
||||
strcat_sub4:
|
||||
.L_strcat_sub4:
|
||||
sub r0, r0, #4
|
||||
b strcat_r0_scan_done
|
||||
b .L_strcat_r0_scan_done
|
||||
|
||||
strcat_sub3:
|
||||
.L_strcat_sub3:
|
||||
sub r0, r0, #3
|
||||
b strcat_r0_scan_done
|
||||
b .L_strcat_r0_scan_done
|
||||
|
||||
strcat_sub2:
|
||||
.L_strcat_sub2:
|
||||
sub r0, r0, #2
|
||||
b strcat_r0_scan_done
|
||||
b .L_strcat_r0_scan_done
|
||||
END(strcat)
|
||||
|
@ -65,38 +65,38 @@ ENTRY(strlen)
|
||||
mov r1, r0
|
||||
|
||||
ands r3, r0, #7
|
||||
beq mainloop
|
||||
beq .L_mainloop
|
||||
|
||||
// Align to a double word (64 bits).
|
||||
rsb r3, r3, #8
|
||||
lsls ip, r3, #31
|
||||
beq align_to_32
|
||||
beq .L_align_to_32
|
||||
|
||||
ldrb r2, [r1], #1
|
||||
cbz r2, update_count_and_return
|
||||
cbz r2, .L_update_count_and_return
|
||||
|
||||
align_to_32:
|
||||
bcc align_to_64
|
||||
.L_align_to_32:
|
||||
bcc .L_align_to_64
|
||||
ands ip, r3, #2
|
||||
beq align_to_64
|
||||
beq .L_align_to_64
|
||||
|
||||
ldrb r2, [r1], #1
|
||||
cbz r2, update_count_and_return
|
||||
cbz r2, .L_update_count_and_return
|
||||
ldrb r2, [r1], #1
|
||||
cbz r2, update_count_and_return
|
||||
cbz r2, .L_update_count_and_return
|
||||
|
||||
align_to_64:
|
||||
.L_align_to_64:
|
||||
tst r3, #4
|
||||
beq mainloop
|
||||
beq .L_mainloop
|
||||
ldr r3, [r1], #4
|
||||
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne zero_in_second_register
|
||||
bne .L_zero_in_second_register
|
||||
|
||||
.p2align 2
|
||||
mainloop:
|
||||
.L_mainloop:
|
||||
ldrd r2, r3, [r1], #8
|
||||
|
||||
pld [r1, #64]
|
||||
@ -104,62 +104,62 @@ mainloop:
|
||||
sub ip, r2, #0x01010101
|
||||
bic ip, ip, r2
|
||||
ands ip, ip, #0x80808080
|
||||
bne zero_in_first_register
|
||||
bne .L_zero_in_first_register
|
||||
|
||||
sub ip, r3, #0x01010101
|
||||
bic ip, ip, r3
|
||||
ands ip, ip, #0x80808080
|
||||
bne zero_in_second_register
|
||||
b mainloop
|
||||
bne .L_zero_in_second_register
|
||||
b .L_mainloop
|
||||
|
||||
update_count_and_return:
|
||||
.L_update_count_and_return:
|
||||
sub r0, r1, r0
|
||||
sub r0, r0, #1
|
||||
bx lr
|
||||
|
||||
zero_in_first_register:
|
||||
.L_zero_in_first_register:
|
||||
sub r0, r1, r0
|
||||
lsls r3, ip, #17
|
||||
bne sub8_and_return
|
||||
bcs sub7_and_return
|
||||
bne .L_sub8_and_return
|
||||
bcs .L_sub7_and_return
|
||||
lsls ip, ip, #1
|
||||
bne sub6_and_return
|
||||
bne .L_sub6_and_return
|
||||
|
||||
sub r0, r0, #5
|
||||
bx lr
|
||||
|
||||
sub8_and_return:
|
||||
.L_sub8_and_return:
|
||||
sub r0, r0, #8
|
||||
bx lr
|
||||
|
||||
sub7_and_return:
|
||||
.L_sub7_and_return:
|
||||
sub r0, r0, #7
|
||||
bx lr
|
||||
|
||||
sub6_and_return:
|
||||
.L_sub6_and_return:
|
||||
sub r0, r0, #6
|
||||
bx lr
|
||||
|
||||
zero_in_second_register:
|
||||
.L_zero_in_second_register:
|
||||
sub r0, r1, r0
|
||||
lsls r3, ip, #17
|
||||
bne sub4_and_return
|
||||
bcs sub3_and_return
|
||||
bne .L_sub4_and_return
|
||||
bcs .L_sub3_and_return
|
||||
lsls ip, ip, #1
|
||||
bne sub2_and_return
|
||||
bne .L_sub2_and_return
|
||||
|
||||
sub r0, r0, #1
|
||||
bx lr
|
||||
|
||||
sub4_and_return:
|
||||
.L_sub4_and_return:
|
||||
sub r0, r0, #4
|
||||
bx lr
|
||||
|
||||
sub3_and_return:
|
||||
.L_sub3_and_return:
|
||||
sub r0, r0, #3
|
||||
bx lr
|
||||
|
||||
sub2_and_return:
|
||||
.L_sub2_and_return:
|
||||
sub r0, r0, #2
|
||||
bx lr
|
||||
END(strlen)
|
||||
|
Loading…
x
Reference in New Issue
Block a user