Merge "Make all labels local."

This commit is contained in:
Christopher Ferris 2015-07-24 00:47:06 +00:00 committed by Gerrit Code Review
commit a3a619835d
2 changed files with 175 additions and 175 deletions

View File

@ -70,7 +70,7 @@
.macro m_scan_byte
ldrb r3, [r0]
cbz r3, strcat_r0_scan_done
cbz r3, .L_strcat_r0_scan_done
add r0, #1
.endm // m_scan_byte
@ -84,10 +84,10 @@ ENTRY(strcat)
// Quick check to see if src is empty.
ldrb r2, [r1]
pld [r1, #0]
cbnz r2, strcat_continue
cbnz r2, .L_strcat_continue
bx lr
strcat_continue:
.L_strcat_continue:
// To speed up really small dst strings, unroll checking the first 4 bytes.
m_push
m_scan_byte
@ -96,95 +96,95 @@ strcat_continue:
m_scan_byte
ands r3, r0, #7
beq strcat_mainloop
beq .L_strcat_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq strcat_align_to_32
beq .L_strcat_align_to_32
ldrb r5, [r0]
cbz r5, strcat_r0_scan_done
cbz r5, .L_strcat_r0_scan_done
add r0, r0, #1
strcat_align_to_32:
bcc strcat_align_to_64
.L_strcat_align_to_32:
bcc .L_strcat_align_to_64
ldrb r2, [r0]
cbz r2, strcat_r0_scan_done
cbz r2, .L_strcat_r0_scan_done
add r0, r0, #1
ldrb r4, [r0]
cbz r4, strcat_r0_scan_done
cbz r4, .L_strcat_r0_scan_done
add r0, r0, #1
strcat_align_to_64:
.L_strcat_align_to_64:
tst r3, #4
beq strcat_mainloop
beq .L_strcat_mainloop
ldr r3, [r0], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcat_zero_in_second_register
b strcat_mainloop
bne .L_strcat_zero_in_second_register
b .L_strcat_mainloop
strcat_r0_scan_done:
.L_strcat_r0_scan_done:
// For short copies, hard-code checking the first 8 bytes since this
// new code doesn't win until after about 8 bytes.
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
m_copy_byte reg=r5, cmd=cbz, label=.L_strcpy_finish
m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
m_copy_byte reg=r5, cmd=cbnz, label=.L_strcpy_continue
strcpy_finish:
.L_strcpy_finish:
m_pop
strcpy_continue:
.L_strcpy_continue:
ands r3, r0, #7
beq strcpy_check_src_align
beq .L_strcpy_check_src_align
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq strcpy_align_to_32
beq .L_strcpy_align_to_32
ldrb r2, [r1], #1
strb r2, [r0], #1
cbz r2, strcpy_complete
cbz r2, .L_strcpy_complete
strcpy_align_to_32:
bcc strcpy_align_to_64
.L_strcpy_align_to_32:
bcc .L_strcpy_align_to_64
ldrb r2, [r1], #1
strb r2, [r0], #1
cbz r2, strcpy_complete
cbz r2, .L_strcpy_complete
ldrb r2, [r1], #1
strb r2, [r0], #1
cbz r2, strcpy_complete
cbz r2, .L_strcpy_complete
strcpy_align_to_64:
.L_strcpy_align_to_64:
tst r3, #4
beq strcpy_check_src_align
beq .L_strcpy_check_src_align
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
bne .L_strcpy_zero_in_first_register
str r2, [r0], #4
strcpy_check_src_align:
.L_strcpy_check_src_align:
// At this point dst is aligned to a double word, check if src
// is also aligned to a double word.
ands r3, r1, #7
bne strcpy_unaligned_copy
bne .L_strcpy_unaligned_copy
.p2align 2
strcpy_mainloop:
.L_strcpy_mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
@ -192,128 +192,128 @@ strcpy_mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
bne .L_strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_mainloop
b .L_strcpy_mainloop
strcpy_complete:
.L_strcpy_complete:
m_pop
strcpy_zero_in_first_register:
.L_strcpy_zero_in_first_register:
lsls lr, ip, #17
bne strcpy_copy1byte
bcs strcpy_copy2bytes
bne .L_strcpy_copy1byte
bcs .L_strcpy_copy2bytes
lsls ip, ip, #1
bne strcpy_copy3bytes
bne .L_strcpy_copy3bytes
strcpy_copy4bytes:
.L_strcpy_copy4bytes:
// Copy 4 bytes to the destiniation.
str r2, [r0]
m_pop
strcpy_copy1byte:
.L_strcpy_copy1byte:
strb r2, [r0]
m_pop
strcpy_copy2bytes:
.L_strcpy_copy2bytes:
strh r2, [r0]
m_pop
strcpy_copy3bytes:
.L_strcpy_copy3bytes:
strh r2, [r0], #2
lsr r2, #16
strb r2, [r0]
m_pop
strcpy_zero_in_second_register:
.L_strcpy_zero_in_second_register:
lsls lr, ip, #17
bne strcpy_copy5bytes
bcs strcpy_copy6bytes
bne .L_strcpy_copy5bytes
bcs .L_strcpy_copy6bytes
lsls ip, ip, #1
bne strcpy_copy7bytes
bne .L_strcpy_copy7bytes
// Copy 8 bytes to the destination.
strd r2, r3, [r0]
m_pop
strcpy_copy5bytes:
.L_strcpy_copy5bytes:
str r2, [r0], #4
strb r3, [r0]
m_pop
strcpy_copy6bytes:
.L_strcpy_copy6bytes:
str r2, [r0], #4
strh r3, [r0]
m_pop
strcpy_copy7bytes:
.L_strcpy_copy7bytes:
str r2, [r0], #4
strh r3, [r0], #2
lsr r3, #16
strb r3, [r0]
m_pop
strcpy_unaligned_copy:
.L_strcpy_unaligned_copy:
// Dst is aligned to a double word, while src is at an unknown alignment.
// There are 7 different versions of the unaligned copy code
// to prevent overreading the src. The mainloop of every single version
// will store 64 bits per loop. The difference is how much of src can
// be read without potentially crossing a page boundary.
tbb [pc, r3]
strcpy_unaligned_branchtable:
.L_strcpy_unaligned_branchtable:
.byte 0
.byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
.byte ((.L_strcpy_unalign7 - .L_strcpy_unaligned_branchtable)/2)
.byte ((.L_strcpy_unalign6 - .L_strcpy_unaligned_branchtable)/2)
.byte ((.L_strcpy_unalign5 - .L_strcpy_unaligned_branchtable)/2)
.byte ((.L_strcpy_unalign4 - .L_strcpy_unaligned_branchtable)/2)
.byte ((.L_strcpy_unalign3 - .L_strcpy_unaligned_branchtable)/2)
.byte ((.L_strcpy_unalign2 - .L_strcpy_unaligned_branchtable)/2)
.byte ((.L_strcpy_unalign1 - .L_strcpy_unaligned_branchtable)/2)
.p2align 2
// Can read 7 bytes before possibly crossing a page.
strcpy_unalign7:
.L_strcpy_unalign7:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
bne .L_strcpy_zero_in_first_register
ldrb r3, [r1]
cbz r3, strcpy_unalign7_copy5bytes
cbz r3, .L_strcpy_unalign7_copy5bytes
ldrb r4, [r1, #1]
cbz r4, strcpy_unalign7_copy6bytes
cbz r4, .L_strcpy_unalign7_copy6bytes
ldrb r5, [r1, #2]
cbz r5, strcpy_unalign7_copy7bytes
cbz r5, .L_strcpy_unalign7_copy7bytes
ldr r3, [r1], #4
pld [r1, #64]
lsrs ip, r3, #24
strd r2, r3, [r0], #8
beq strcpy_unalign_return
b strcpy_unalign7
beq .L_strcpy_unalign_return
b .L_strcpy_unalign7
strcpy_unalign7_copy5bytes:
.L_strcpy_unalign7_copy5bytes:
str r2, [r0], #4
strb r3, [r0]
strcpy_unalign_return:
.L_strcpy_unalign_return:
m_pop
strcpy_unalign7_copy6bytes:
.L_strcpy_unalign7_copy6bytes:
str r2, [r0], #4
strb r3, [r0], #1
strb r4, [r0], #1
m_pop
strcpy_unalign7_copy7bytes:
.L_strcpy_unalign7_copy7bytes:
str r2, [r0], #4
strb r3, [r0], #1
strb r4, [r0], #1
@ -322,41 +322,41 @@ strcpy_unalign7_copy7bytes:
.p2align 2
// Can read 6 bytes before possibly crossing a page.
strcpy_unalign6:
.L_strcpy_unalign6:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
bne .L_strcpy_zero_in_first_register
ldrb r4, [r1]
cbz r4, strcpy_unalign_copy5bytes
cbz r4, .L_strcpy_unalign_copy5bytes
ldrb r5, [r1, #1]
cbz r5, strcpy_unalign_copy6bytes
cbz r5, .L_strcpy_unalign_copy6bytes
ldr r3, [r1], #4
pld [r1, #64]
tst r3, #0xff0000
beq strcpy_copy7bytes
beq .L_strcpy_copy7bytes
lsrs ip, r3, #24
strd r2, r3, [r0], #8
beq strcpy_unalign_return
b strcpy_unalign6
beq .L_strcpy_unalign_return
b .L_strcpy_unalign6
.p2align 2
// Can read 5 bytes before possibly crossing a page.
strcpy_unalign5:
.L_strcpy_unalign5:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
bne .L_strcpy_zero_in_first_register
ldrb r4, [r1]
cbz r4, strcpy_unalign_copy5bytes
cbz r4, .L_strcpy_unalign_copy5bytes
ldr r3, [r1], #4
@ -365,17 +365,17 @@ strcpy_unalign5:
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign5
b .L_strcpy_unalign5
strcpy_unalign_copy5bytes:
.L_strcpy_unalign_copy5bytes:
str r2, [r0], #4
strb r4, [r0]
m_pop
strcpy_unalign_copy6bytes:
.L_strcpy_unalign_copy6bytes:
str r2, [r0], #4
strb r4, [r0], #1
strb r5, [r0]
@ -383,13 +383,13 @@ strcpy_unalign_copy6bytes:
.p2align 2
// Can read 4 bytes before possibly crossing a page.
strcpy_unalign4:
.L_strcpy_unalign4:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
bne .L_strcpy_zero_in_first_register
ldr r3, [r1], #4
pld [r1, #64]
@ -397,20 +397,20 @@ strcpy_unalign4:
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign4
b .L_strcpy_unalign4
.p2align 2
// Can read 3 bytes before possibly crossing a page.
strcpy_unalign3:
.L_strcpy_unalign3:
ldrb r2, [r1]
cbz r2, strcpy_unalign3_copy1byte
cbz r2, .L_strcpy_unalign3_copy1byte
ldrb r3, [r1, #1]
cbz r3, strcpy_unalign3_copy2bytes
cbz r3, .L_strcpy_unalign3_copy2bytes
ldrb r4, [r1, #2]
cbz r4, strcpy_unalign3_copy3bytes
cbz r4, .L_strcpy_unalign3_copy3bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
@ -418,26 +418,26 @@ strcpy_unalign3:
pld [r1, #64]
lsrs lr, r2, #24
beq strcpy_copy4bytes
beq .L_strcpy_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign3
b .L_strcpy_unalign3
strcpy_unalign3_copy1byte:
.L_strcpy_unalign3_copy1byte:
strb r2, [r0]
m_pop
strcpy_unalign3_copy2bytes:
.L_strcpy_unalign3_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_pop
strcpy_unalign3_copy3bytes:
.L_strcpy_unalign3_copy3bytes:
strb r2, [r0], #1
strb r3, [r0], #1
strb r4, [r0]
@ -445,34 +445,34 @@ strcpy_unalign3_copy3bytes:
.p2align 2
// Can read 2 bytes before possibly crossing a page.
strcpy_unalign2:
.L_strcpy_unalign2:
ldrb r2, [r1]
cbz r2, strcpy_unalign_copy1byte
cbz r2, .L_strcpy_unalign_copy1byte
ldrb r4, [r1, #1]
cbz r4, strcpy_unalign_copy2bytes
cbz r4, .L_strcpy_unalign_copy2bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
tst r2, #0xff0000
beq strcpy_copy3bytes
beq .L_strcpy_copy3bytes
lsrs ip, r2, #24
beq strcpy_copy4bytes
beq .L_strcpy_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign2
b .L_strcpy_unalign2
.p2align 2
// Can read 1 byte before possibly crossing a page.
strcpy_unalign1:
.L_strcpy_unalign1:
ldrb r2, [r1]
cbz r2, strcpy_unalign_copy1byte
cbz r2, .L_strcpy_unalign_copy1byte
ldr r2, [r1], #4
ldr r3, [r1], #4
@ -482,27 +482,27 @@ strcpy_unalign1:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
bne .L_strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
bne .L_strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign1
b .L_strcpy_unalign1
strcpy_unalign_copy1byte:
.L_strcpy_unalign_copy1byte:
strb r2, [r0]
m_pop
strcpy_unalign_copy2bytes:
.L_strcpy_unalign_copy2bytes:
strb r2, [r0], #1
strb r4, [r0]
m_pop
.p2align 2
strcat_mainloop:
.L_strcat_mainloop:
ldrd r2, r3, [r0], #8
pld [r0, #64]
@ -510,59 +510,59 @@ strcat_mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcat_zero_in_first_register
bne .L_strcat_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcat_zero_in_second_register
b strcat_mainloop
bne .L_strcat_zero_in_second_register
b .L_strcat_mainloop
strcat_zero_in_first_register:
.L_strcat_zero_in_first_register:
// Prefetch the src now, it's going to be used soon.
pld [r1, #0]
lsls lr, ip, #17
bne strcat_sub8
bcs strcat_sub7
bne .L_strcat_sub8
bcs .L_strcat_sub7
lsls ip, ip, #1
bne strcat_sub6
bne .L_strcat_sub6
sub r0, r0, #5
b strcat_r0_scan_done
b .L_strcat_r0_scan_done
strcat_sub8:
.L_strcat_sub8:
sub r0, r0, #8
b strcat_r0_scan_done
b .L_strcat_r0_scan_done
strcat_sub7:
.L_strcat_sub7:
sub r0, r0, #7
b strcat_r0_scan_done
b .L_strcat_r0_scan_done
strcat_sub6:
.L_strcat_sub6:
sub r0, r0, #6
b strcat_r0_scan_done
b .L_strcat_r0_scan_done
strcat_zero_in_second_register:
.L_strcat_zero_in_second_register:
// Prefetch the src now, it's going to be used soon.
pld [r1, #0]
lsls lr, ip, #17
bne strcat_sub4
bcs strcat_sub3
bne .L_strcat_sub4
bcs .L_strcat_sub3
lsls ip, ip, #1
bne strcat_sub2
bne .L_strcat_sub2
sub r0, r0, #1
b strcat_r0_scan_done
b .L_strcat_r0_scan_done
strcat_sub4:
.L_strcat_sub4:
sub r0, r0, #4
b strcat_r0_scan_done
b .L_strcat_r0_scan_done
strcat_sub3:
.L_strcat_sub3:
sub r0, r0, #3
b strcat_r0_scan_done
b .L_strcat_r0_scan_done
strcat_sub2:
.L_strcat_sub2:
sub r0, r0, #2
b strcat_r0_scan_done
b .L_strcat_r0_scan_done
END(strcat)

View File

@ -65,38 +65,38 @@ ENTRY(strlen)
mov r1, r0
ands r3, r0, #7
beq mainloop
beq .L_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq align_to_32
beq .L_align_to_32
ldrb r2, [r1], #1
cbz r2, update_count_and_return
cbz r2, .L_update_count_and_return
align_to_32:
bcc align_to_64
.L_align_to_32:
bcc .L_align_to_64
ands ip, r3, #2
beq align_to_64
beq .L_align_to_64
ldrb r2, [r1], #1
cbz r2, update_count_and_return
cbz r2, .L_update_count_and_return
ldrb r2, [r1], #1
cbz r2, update_count_and_return
cbz r2, .L_update_count_and_return
align_to_64:
.L_align_to_64:
tst r3, #4
beq mainloop
beq .L_mainloop
ldr r3, [r1], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne zero_in_second_register
bne .L_zero_in_second_register
.p2align 2
mainloop:
.L_mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
@ -104,62 +104,62 @@ mainloop:
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne zero_in_first_register
bne .L_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne zero_in_second_register
b mainloop
bne .L_zero_in_second_register
b .L_mainloop
update_count_and_return:
.L_update_count_and_return:
sub r0, r1, r0
sub r0, r0, #1
bx lr
zero_in_first_register:
.L_zero_in_first_register:
sub r0, r1, r0
lsls r3, ip, #17
bne sub8_and_return
bcs sub7_and_return
bne .L_sub8_and_return
bcs .L_sub7_and_return
lsls ip, ip, #1
bne sub6_and_return
bne .L_sub6_and_return
sub r0, r0, #5
bx lr
sub8_and_return:
.L_sub8_and_return:
sub r0, r0, #8
bx lr
sub7_and_return:
.L_sub7_and_return:
sub r0, r0, #7
bx lr
sub6_and_return:
.L_sub6_and_return:
sub r0, r0, #6
bx lr
zero_in_second_register:
.L_zero_in_second_register:
sub r0, r1, r0
lsls r3, ip, #17
bne sub4_and_return
bcs sub3_and_return
bne .L_sub4_and_return
bcs .L_sub3_and_return
lsls ip, ip, #1
bne sub2_and_return
bne .L_sub2_and_return
sub r0, r0, #1
bx lr
sub4_and_return:
.L_sub4_and_return:
sub r0, r0, #4
bx lr
sub3_and_return:
.L_sub3_and_return:
sub r0, r0, #3
bx lr
sub2_and_return:
.L_sub2_and_return:
sub r0, r0, #2
bx lr
END(strlen)