2019-08-19 18:20:29 +08:00
|
|
|
/**********************************************************************
|
|
|
|
Copyright(c) 2019 Arm Corporation All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions
|
|
|
|
are met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in
|
|
|
|
the documentation and/or other materials provided with the
|
|
|
|
distribution.
|
|
|
|
* Neither the name of Arm Corporation nor the names of its
|
|
|
|
contributors may be used to endorse or promote products derived
|
|
|
|
from this software without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
**********************************************************************/
|
2020-11-22 01:51:37 +09:00
|
|
|
|
|
|
|
#include "../include/aarch64_label.h"
|
|
|
|
|
2019-08-19 18:20:29 +08:00
|
|
|
.arch armv8-a+crypto
|
|
|
|
.text
|
|
|
|
.align 3
|
|
|
|
|
|
|
|
/*
|
|
|
|
Macros
|
|
|
|
*/
|
|
|
|
|
|
|
|
.macro declare_var_vector_reg name:req,reg:req
|
|
|
|
\name\()_q .req q\reg
|
|
|
|
\name\()_v .req v\reg
|
|
|
|
\name\()_s .req s\reg
|
|
|
|
\name\()_d .req d\reg
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro mod_adler dest:req,tmp:req
|
|
|
|
umull \tmp\()_x,\dest,const_div1
|
|
|
|
lsr \tmp\()_x,\tmp\()_x,47
|
|
|
|
msub \dest,\tmp,const_div2,\dest
|
|
|
|
.endm
|
|
|
|
|
|
|
|
/*
|
|
|
|
uint32_t adler32_neon(uint32_t adler32, uint8_t * start, uint32_t length);
|
|
|
|
*/
|
|
|
|
/*
|
2024-01-23 11:05:40 +00:00
|
|
|
Arguments list
|
2019-08-19 18:20:29 +08:00
|
|
|
*/
|
|
|
|
adler32 .req w0
|
|
|
|
start .req x1
|
|
|
|
length .req x2
|
2020-11-22 01:51:37 +09:00
|
|
|
.global cdecl(adler32_neon)
|
|
|
|
#ifndef __APPLE__
|
2019-08-19 18:20:29 +08:00
|
|
|
.type adler32_neon, %function
|
2020-11-22 01:51:37 +09:00
|
|
|
#endif
|
|
|
|
cdecl(adler32_neon):
|
2019-08-19 18:20:29 +08:00
|
|
|
/*
|
|
|
|
local variables
|
|
|
|
*/
|
|
|
|
declare_var_vector_reg factor0 , 6
|
|
|
|
declare_var_vector_reg factor1 , 7
|
|
|
|
declare_var_vector_reg d0 , 4
|
|
|
|
declare_var_vector_reg d1 , 5
|
|
|
|
declare_var_vector_reg adacc , 2
|
|
|
|
declare_var_vector_reg s2acc , 3
|
|
|
|
declare_var_vector_reg zero , 16
|
|
|
|
declare_var_vector_reg adler , 17
|
|
|
|
declare_var_vector_reg sum2 , 20
|
|
|
|
declare_var_vector_reg tmp2 , 20
|
|
|
|
|
|
|
|
adler0 .req w4
|
|
|
|
adler1 .req w5
|
|
|
|
adler0_x .req x4
|
|
|
|
adler1_x .req x5
|
|
|
|
end .req x0
|
|
|
|
tmp .req w8
|
|
|
|
tmp_x .req x8
|
|
|
|
tmp1_x .req x9
|
|
|
|
loop_cnt .req x10
|
|
|
|
loop_const .req x11
|
|
|
|
const_div1 .req w6
|
|
|
|
const_div2 .req w7
|
|
|
|
mov const_div1, 32881
|
|
|
|
movk const_div1, 0x8007, lsl 16
|
|
|
|
mov const_div2, 65521
|
|
|
|
and adler0, adler32, 0xffff
|
|
|
|
lsr adler1, adler32, 16
|
|
|
|
|
|
|
|
lsr loop_cnt,length,5
|
2020-11-22 01:51:37 +09:00
|
|
|
#ifndef __APPLE__
|
2019-08-19 18:20:29 +08:00
|
|
|
adrp x3,factors
|
|
|
|
add x3,x3,:lo12:factors
|
2020-11-22 01:51:37 +09:00
|
|
|
#else
|
|
|
|
adrp x3,factors@PAGE
|
|
|
|
add x3,x3,factors@PAGEOFF
|
|
|
|
#endif
|
2019-08-19 18:20:29 +08:00
|
|
|
ld1 {factor0_v.16b-factor1_v.16b},[x3]
|
|
|
|
|
|
|
|
add end,start,length
|
|
|
|
cbz loop_cnt,final_accum32
|
|
|
|
mov loop_const,173
|
|
|
|
|
|
|
|
movi v16.4s,0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
great_than_32:
|
|
|
|
cmp loop_cnt,173
|
|
|
|
csel loop_const,loop_cnt,loop_const,le
|
|
|
|
mov adacc_v.16b,zero_v.16b
|
|
|
|
mov s2acc_v.16b,zero_v.16b
|
|
|
|
ins adacc_v.s[0],adler0
|
|
|
|
ins s2acc_v.s[0],adler1
|
|
|
|
add tmp_x,start,loop_const,lsl 5
|
|
|
|
|
|
|
|
accum32_neon:
|
igzip: fix neon adler32 load beyond buffer end
In the adler32_neon function, during the last iteration of the
loop through "accum32_neon", we would load data after the end of the
buffer (in the ld1 instruction, the "start" register points to the end
of the buffer).
If this memory is unmapped, this would cause a segfault. If the memory
is mapped, the checksum would be correct because that value would
only be used in the next iteration, but this happens during the last
iteration.
To fix this, we can simply do the load before incrementing "start". And
while we're at it, we can load directly into d0_v/d1_v, saving a couple
of mov's.
Finally, the ld1 done during the function initialization can be removed
as the values aren't used for anything.
Change-Id: I4a0f2811adc523852ebe774da0a6fb1f5419192f
Signed-off-by: Martin Oliveira <martin.oliveira@eideticom.com>
2022-04-20 15:57:10 -06:00
|
|
|
ld1 {d0_v.16b-d1_v.16b},[start]
|
2019-08-19 18:20:29 +08:00
|
|
|
add start,start,32
|
|
|
|
|
|
|
|
shl tmp2_v.4s,adacc_v.4s,5
|
|
|
|
add s2acc_v.4s,s2acc_v.4s,tmp2_v.4s
|
|
|
|
|
|
|
|
uaddlp adler_v.8h,d0_v.16b
|
|
|
|
uadalp adler_v.8h,d1_v.16b
|
|
|
|
uadalp adacc_v.4s,adler_v.8h
|
|
|
|
|
|
|
|
umull sum2_v.8h,factor0_v.8b ,d0_v.8b
|
|
|
|
umlal2 sum2_v.8h,factor0_v.16b,d0_v.16b
|
|
|
|
umlal sum2_v.8h,factor1_v.8b ,d1_v.8b
|
|
|
|
umlal2 sum2_v.8h,factor1_v.16b,d1_v.16b
|
|
|
|
uadalp s2acc_v.4s,sum2_v.8h
|
|
|
|
|
|
|
|
cmp start,tmp_x
|
|
|
|
bne accum32_neon
|
|
|
|
|
|
|
|
uaddlv adacc_d,adacc_v.4s
|
|
|
|
uaddlv s2acc_d,s2acc_v.4s
|
|
|
|
fmov adler0_x,adacc_d
|
|
|
|
fmov adler1_x,s2acc_d
|
|
|
|
|
|
|
|
mod_adler adler0,tmp
|
|
|
|
mod_adler adler1,tmp
|
|
|
|
sub loop_cnt,loop_cnt,loop_const
|
|
|
|
cbnz loop_cnt,great_than_32
|
|
|
|
|
|
|
|
final_accum32:
|
|
|
|
and length,length,31
|
|
|
|
cbz length,end_func
|
|
|
|
|
|
|
|
accum32_body:
|
|
|
|
cmp start,end
|
|
|
|
beq end_func
|
|
|
|
ldrb tmp,[start],1
|
|
|
|
add adler0,adler0,tmp
|
|
|
|
add adler1,adler1,adler0
|
|
|
|
b accum32_body
|
|
|
|
|
|
|
|
end_func:
|
|
|
|
mod_adler adler0,tmp
|
|
|
|
mod_adler adler1,tmp
|
|
|
|
orr w0,adler0,adler1,lsl 16
|
|
|
|
ret
|
|
|
|
|
2020-11-22 01:51:37 +09:00
|
|
|
#ifndef __APPLE__
|
2019-08-19 18:20:29 +08:00
|
|
|
.size adler32_neon, .-adler32_neon
|
|
|
|
.section .rodata.cst16,"aM",@progbits,16
|
2020-11-22 01:51:37 +09:00
|
|
|
#else
|
|
|
|
.section __TEXT,__const
|
|
|
|
#endif
|
2019-08-19 18:20:29 +08:00
|
|
|
.align 4
|
|
|
|
factors:
|
|
|
|
.quad 0x191a1b1c1d1e1f20
|
|
|
|
.quad 0x1112131415161718
|
|
|
|
.quad 0x090a0b0c0d0e0f10
|
|
|
|
.quad 0x0102030405060708
|