mirror of
https://github.com/intel/isa-l.git
synced 2025-10-28 11:31:51 +01:00
erasure_code: optimize RVV implementation
The ISA-L EC code has been written using RVV vector instructions and the minimum multiplication table, resulting in a performance improvement of over 10 times compared to the existing implementation. Signed-off-by: Shuo Lv <lv.shuo@sanechips.com.cn>
This commit is contained in:
@@ -152,7 +152,7 @@ v2.32
|
|||||||
- Added new RVV xor_gen, pq_gen implementations.
|
- Added new RVV xor_gen, pq_gen implementations.
|
||||||
|
|
||||||
* Erasure coding improvements:
|
* Erasure coding improvements:
|
||||||
- Added new RVV ec_encode_data, gf_vect_dot_prod, gf_vect_mul implementations.
|
- Added new RVV ec_encode_data,ec_encode_data_update,gf_vect_mad, gf_vect_dot_prod, gf_vect_mul implementations.
|
||||||
|
|
||||||
* Zero-memory detection improvements:
|
* Zero-memory detection improvements:
|
||||||
- Added new RVV implementations.
|
- Added new RVV implementations.
|
||||||
|
|||||||
@@ -67,8 +67,13 @@ case "${CPU}" in
|
|||||||
])],
|
])],
|
||||||
[AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions])
|
[AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions])
|
||||||
AM_CONDITIONAL([HAVE_RVV], [true]) rvv=yes],
|
AM_CONDITIONAL([HAVE_RVV], [true]) rvv=yes],
|
||||||
[AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no]
|
[AC_DEFINE([HAVE_RVV], [0], [Disable RVV instructions])
|
||||||
|
AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no]
|
||||||
)
|
)
|
||||||
|
if test "x$rvv" = "xyes"; then
|
||||||
|
CFLAGS+=" -march=rv64gcv"
|
||||||
|
CCASFLAGS+=" -march=rv64gcv"
|
||||||
|
fi
|
||||||
AC_MSG_RESULT([$rvv])
|
AC_MSG_RESULT([$rvv])
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
|||||||
@@ -50,6 +50,9 @@
|
|||||||
#define EFENCE_TEST_MIN_SIZE 16
|
#define EFENCE_TEST_MIN_SIZE 16
|
||||||
#define EFENCE_TEST_MAX_SIZE EFENCE_TEST_MIN_SIZE + 0x100
|
#define EFENCE_TEST_MAX_SIZE EFENCE_TEST_MIN_SIZE + 0x100
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
#define EC_ALIGNED_ADDR
|
||||||
|
#endif
|
||||||
#ifdef EC_ALIGNED_ADDR
|
#ifdef EC_ALIGNED_ADDR
|
||||||
// Define power of 2 range to check ptr, len alignment
|
// Define power of 2 range to check ptr, len alignment
|
||||||
#define PTR_ALIGN_CHK_B 0
|
#define PTR_ALIGN_CHK_B 0
|
||||||
|
|||||||
@@ -35,8 +35,13 @@
|
|||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
#ifndef ALIGN_SIZE
|
#ifndef ALIGN_SIZE
|
||||||
|
#if HAVE_RVV
|
||||||
|
#define EC_ALIGNED_ADDR
|
||||||
|
#define ALIGN_SIZE 32
|
||||||
|
#else
|
||||||
#define ALIGN_SIZE 16
|
#define ALIGN_SIZE 16
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// By default, test multibinary version
|
// By default, test multibinary version
|
||||||
#ifndef FUNCTION_UNDER_TEST
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
|||||||
@@ -1,3 +1,31 @@
|
|||||||
|
#########################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
########################################################################
|
########################################################################
|
||||||
# Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
|
# Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
|
||||||
#
|
#
|
||||||
@@ -28,8 +56,20 @@
|
|||||||
########################################################################
|
########################################################################
|
||||||
|
|
||||||
lsrc_riscv64 += \
|
lsrc_riscv64 += \
|
||||||
erasure_code/riscv64/ec_multibinary_riscv64_dispatcher.c \
|
erasure_code/riscv64/ec_riscv64_dispatcher.c \
|
||||||
erasure_code/riscv64/ec_multibinary_riscv64.S \
|
erasure_code/riscv64/ec_multibinary_riscv64.S \
|
||||||
erasure_code/riscv64/ec_gf_vect_mul_rvv.S \
|
erasure_code/riscv64/ec_riscv64_highlevel_func.c \
|
||||||
erasure_code/riscv64/ec_gf_vect_dot_prod_rvv.S \
|
erasure_code/riscv64/gf_vect_dot_prod_rvv.S \
|
||||||
erasure_code/riscv64/ec_encode_data_rvv.S
|
erasure_code/riscv64/gf_2vect_dot_prod_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_3vect_dot_prod_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_4vect_dot_prod_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_5vect_dot_prod_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_6vect_dot_prod_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_7vect_dot_prod_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_vect_mad_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_2vect_mad_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_3vect_mad_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_4vect_mad_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_5vect_mad_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_6vect_mad_rvv.S \
|
||||||
|
erasure_code/riscv64/gf_vect_mul_rvv.S
|
||||||
|
|||||||
@@ -1,154 +0,0 @@
|
|||||||
/**********************************************************************
|
|
||||||
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of ISCAS nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived
|
|
||||||
from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
**********************************************************************/
|
|
||||||
#if HAVE_RVV
|
|
||||||
#include "ec_table.S"
|
|
||||||
.option arch, +v
|
|
||||||
.global ec_encode_data_rvv
|
|
||||||
.type ec_encode_data_rvv, %function
|
|
||||||
ec_encode_data_rvv:
|
|
||||||
blez a2, 3f
|
|
||||||
blez a0, 3f
|
|
||||||
|
|
||||||
lla t0, gff_base
|
|
||||||
lla t1, gflog_base
|
|
||||||
addi a3, a3, 1
|
|
||||||
vsetvli zero, a1, e8, mf2, ta, ma
|
|
||||||
vmv.v.i v20, 0
|
|
||||||
li t3, 32
|
|
||||||
mv a6, a0 // backup len
|
|
||||||
mv a7, a5 // backup dest
|
|
||||||
|
|
||||||
csrr t5, vlenb // vlen/8
|
|
||||||
srli t5, t5, 1 // mf2: vlen/16
|
|
||||||
blt t5, a1, slow // vlen/16(hardware) < vlen(software)
|
|
||||||
|
|
||||||
2:
|
|
||||||
li t2, 0 // l
|
|
||||||
vlse8.v v24, (a3), t3 // v[j*32+1]
|
|
||||||
vmsne.vi v12, v24, 0 // if v == 0
|
|
||||||
vluxei8.v v24, (t1), v24 // gflag_base[v[]]
|
|
||||||
ld a5, (a5)
|
|
||||||
|
|
||||||
1:
|
|
||||||
vsetvli zero, zero, e8, mf2, ta, ma
|
|
||||||
vle64.v v16, (a4) // src[j]
|
|
||||||
vluxei64.v v16, (t2), v16 // src[j][i]
|
|
||||||
vmsne.vi v0, v16, 0 // if src == 0
|
|
||||||
vmand.mm v0, v0, v12 // if src == 0 || v == 0
|
|
||||||
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
|
|
||||||
vwaddu.vv v8, v16, v24, v0.t
|
|
||||||
vmv.v.i v16, 0
|
|
||||||
vsetvli zero, zero, e8, mf2, ta, mu
|
|
||||||
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
|
|
||||||
vxor.vv v20, v16, v20
|
|
||||||
vmv.s.x v8, zero
|
|
||||||
vredxor.vs v8, v20, v8
|
|
||||||
vmv.x.s t5, v8
|
|
||||||
addi a0, a0, -1 // len
|
|
||||||
sb t5, (a5) // dest[0][i]
|
|
||||||
addi t2, t2, 1 // src[j][i]
|
|
||||||
vmv.v.i v20, 0
|
|
||||||
addi a5, a5, 1 // dest[i]
|
|
||||||
bnez a0, 1b
|
|
||||||
|
|
||||||
addi a2, a2, -1 // l(dests)
|
|
||||||
addi a7, a7, 8
|
|
||||||
mv a0, a6 // restore len
|
|
||||||
mv a5, a7 // update unsigned char **dest
|
|
||||||
slli t5, a1, 5 // += vlen * 32
|
|
||||||
add a3, a3, t5
|
|
||||||
bnez a2, 2b
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
slow:
|
|
||||||
addi sp, sp, -16
|
|
||||||
sd s2, 0(sp)
|
|
||||||
sd s3, 8(sp)
|
|
||||||
mv s3, a4 // src
|
|
||||||
mv s2, a3 // v
|
|
||||||
mv t4, a1 // backup vlen
|
|
||||||
|
|
||||||
2:
|
|
||||||
li t2, 0 // i < len
|
|
||||||
ld a5, (a5)
|
|
||||||
|
|
||||||
1:
|
|
||||||
vsetvli t6, a1, e8, mf2, ta, ma
|
|
||||||
vle64.v v16, (a4) // src[j]
|
|
||||||
vluxei64.v v16, (t2), v16 // src[j][i]
|
|
||||||
vlse8.v v24, (a3), t3 // v[j*32+1]
|
|
||||||
vmsne.vi v12, v24, 0 // if v == 0
|
|
||||||
vmsne.vi v0, v16, 0 // if src == 0
|
|
||||||
vmand.mm v0, v0, v12 // if src == 0 || v == 0
|
|
||||||
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
|
|
||||||
vluxei8.v v24, (t1), v24 // gflag_base[v[]]
|
|
||||||
vwaddu.vv v8, v16, v24, v0.t
|
|
||||||
vmv.v.i v16, 0
|
|
||||||
vsetvli zero, zero, e8, mf2, ta, mu
|
|
||||||
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
|
|
||||||
vxor.vv v20, v16, v20
|
|
||||||
sub a1, a1, t6
|
|
||||||
slli t5, t6, 5
|
|
||||||
add a3, a3, t5 // v += 32 * vlen
|
|
||||||
slli t5, t6, 3
|
|
||||||
add a4, a4, t5 // src += 8 * vlen
|
|
||||||
bnez a1, 1b // for (j = 0; j < vlen; j++)
|
|
||||||
|
|
||||||
vsetvli zero, t4, e8, mf2, ta, ma
|
|
||||||
vmv.s.x v8, zero
|
|
||||||
vredxor.vs v8, v20, v8
|
|
||||||
vmv.x.s t5, v8
|
|
||||||
addi a0, a0, -1 // len
|
|
||||||
sb t5, (a5) // dest[0][i]
|
|
||||||
addi t2, t2, 1 // src[j][i]
|
|
||||||
vmv.v.i v20, 0
|
|
||||||
mv a1, t4 // restore vlen
|
|
||||||
mv a3, s2 // restore v
|
|
||||||
mv a4, s3 // restore src
|
|
||||||
addi a5, a5, 1 // dest[i]
|
|
||||||
bnez a0, 1b // for (i = 0; i < len; i++)
|
|
||||||
|
|
||||||
addi a2, a2, -1 // l(dests)
|
|
||||||
addi a7, a7, 8 // for (l = 0; l < dests; l++)
|
|
||||||
mv a0, a6 // restore len
|
|
||||||
mv a5, a7
|
|
||||||
slli t5, t4, 5
|
|
||||||
add a3, a3, t5 // v += vlen * 32
|
|
||||||
mv s2, a3
|
|
||||||
bnez a2, 2b // for (l = 0; l < dests; l++) {
|
|
||||||
|
|
||||||
ld s2, 0(sp)
|
|
||||||
ld s3, 8(sp)
|
|
||||||
addi sp, sp, 16
|
|
||||||
|
|
||||||
3:
|
|
||||||
ret
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,120 +0,0 @@
|
|||||||
/**********************************************************************
|
|
||||||
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of ISCAS nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived
|
|
||||||
from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
**********************************************************************/
|
|
||||||
#if HAVE_RVV
|
|
||||||
#include "ec_table.S"
|
|
||||||
.option arch, +v
|
|
||||||
.global gf_vect_dot_prod_rvv
|
|
||||||
.type gf_vect_dot_prod_rvv, %function
|
|
||||||
gf_vect_dot_prod_rvv:
|
|
||||||
blez a0, 3f
|
|
||||||
|
|
||||||
lla t0, gff_base
|
|
||||||
lla t1, gflog_base
|
|
||||||
addi a2, a2, 1
|
|
||||||
vsetvli zero, a1, e8, mf2, ta, ma
|
|
||||||
vmv.v.i v20, 0
|
|
||||||
li t2, 0
|
|
||||||
li t3, 32
|
|
||||||
csrr t5, vlenb // vlen/8
|
|
||||||
srli t5, t5, 1 // mf2: vlen/16
|
|
||||||
blt t5, a1, slow // vlen/16(hardware) < vlen(software)
|
|
||||||
|
|
||||||
vlse8.v v24, (a2), t3 // v[j*32+1]
|
|
||||||
vmsne.vi v12, v24, 0 // if v == 0
|
|
||||||
vluxei8.v v24, (t1), v24 // gflag_base[v[]]
|
|
||||||
|
|
||||||
1:
|
|
||||||
vsetvli zero, zero, e8, mf2, ta, ma
|
|
||||||
vle64.v v16, (a3) // src[j]
|
|
||||||
vluxei64.v v16, (t2), v16 // src[j][i]
|
|
||||||
vmsne.vi v0, v16, 0 // if src == 0
|
|
||||||
vmand.mm v0, v0, v12 // if src == 0 || v == 0
|
|
||||||
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
|
|
||||||
vwaddu.vv v8, v16, v24, v0.t
|
|
||||||
vmv.v.i v16, 0
|
|
||||||
vsetvli zero, zero, e8, mf2, ta, mu
|
|
||||||
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
|
|
||||||
vxor.vv v20, v16, v20
|
|
||||||
|
|
||||||
vmv.s.x v8, zero
|
|
||||||
vredxor.vs v8, v20, v8
|
|
||||||
vmv.x.s t5, v8
|
|
||||||
addi a0, a0, -1 // len
|
|
||||||
sb t5, (a4)
|
|
||||||
addi t2, t2, 1 // src[j][i]
|
|
||||||
vmv.v.i v20, 0
|
|
||||||
addi a4, a4, 1 // dest[i]
|
|
||||||
bnez a0, 1b
|
|
||||||
ret
|
|
||||||
|
|
||||||
slow:
|
|
||||||
mv a7, a3 // src
|
|
||||||
mv a6, a2 // v
|
|
||||||
mv t4, a1 // vlen
|
|
||||||
|
|
||||||
1:
|
|
||||||
vsetvli t6, a1, e8, mf2, ta, ma
|
|
||||||
vle64.v v16, (a3)
|
|
||||||
vluxei64.v v16, (t2), v16 // src[j][i]
|
|
||||||
vlse8.v v24, (a2), t3 // v[j*32+1]
|
|
||||||
vmsne.vi v0, v16, 0 // if src == 0
|
|
||||||
vmsne.vi v12, v24, 0 // if v == 0
|
|
||||||
vmand.mm v0, v0, v12
|
|
||||||
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
|
|
||||||
vluxei8.v v24, (t1), v24, v0.t // gflag_base[v[]]
|
|
||||||
vwaddu.vv v8, v16, v24, v0.t
|
|
||||||
vmv.v.i v16, 0
|
|
||||||
vsetvli zero, zero, e8, mf2, ta, mu
|
|
||||||
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
|
|
||||||
vxor.vv v20, v16, v20
|
|
||||||
slli t5, t6, 5
|
|
||||||
add a2, a2, t5 // v += 32 * vlen
|
|
||||||
slli t5, t6, 3
|
|
||||||
add a3, a3, t5 // src += 8 * vlen
|
|
||||||
sub a1, a1, t6 // vlen
|
|
||||||
bnez a1, 1b // for (j = 0; j < vlen; j++)
|
|
||||||
|
|
||||||
vsetvli zero, t4, e8, mf2, ta, mu
|
|
||||||
vmv.s.x v8, zero
|
|
||||||
vredxor.vs v8, v20, v8
|
|
||||||
vmv.x.s t5, v8
|
|
||||||
addi a0, a0, -1 // len
|
|
||||||
mv a3, a7 // src
|
|
||||||
mv a2, a6 // v
|
|
||||||
mv a1, t4 // vlen
|
|
||||||
addi t2, t2, 1 // i
|
|
||||||
sb t5, (a4)
|
|
||||||
vmv.v.i v20, 0
|
|
||||||
addi a4, a4, 1 // dest[i]
|
|
||||||
bnez a0, 1b // for (i = 0; i < len; i++) {
|
|
||||||
|
|
||||||
3:
|
|
||||||
ret
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,76 +0,0 @@
|
|||||||
/**********************************************************************
|
|
||||||
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of ISCAS nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived
|
|
||||||
from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
**********************************************************************/
|
|
||||||
#if HAVE_RVV
|
|
||||||
#include "ec_table.S"
|
|
||||||
.option arch, +v
|
|
||||||
.global gf_vect_mul_rvv
|
|
||||||
.type gf_vect_mul_rvv, %function
|
|
||||||
gf_vect_mul_rvv:
|
|
||||||
li t4, 32
|
|
||||||
rem t4, a0, t4
|
|
||||||
bnez t4, ret1 // (len % 32) != 0
|
|
||||||
|
|
||||||
lla t0, gff_base
|
|
||||||
lla t1, gflog_base
|
|
||||||
lbu t2, 1(a1) // unsigned char c = a[1];
|
|
||||||
beqz t2, 2f
|
|
||||||
add t2, t1, t2 // &gflog_base[c]
|
|
||||||
lbu t2, (t2) // gflog_base[c]
|
|
||||||
|
|
||||||
1:
|
|
||||||
vsetvli t6, a0, e8, m4, ta, ma
|
|
||||||
vle8.v v16, (a2) // src
|
|
||||||
vmsne.vi v0, v16, 0 // if b == 0
|
|
||||||
vluxei8.v v16, (t1), v16, v0.t // gflag_base[b]
|
|
||||||
vwaddu.vx v8, v16, t2, v0.t
|
|
||||||
vmv.v.i v16, 0
|
|
||||||
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
|
|
||||||
vse8.v v16, (a3)
|
|
||||||
add a2, a2, t6
|
|
||||||
add a3, a3, t6
|
|
||||||
sub a0, a0, t6
|
|
||||||
bnez a0, 1b
|
|
||||||
ret
|
|
||||||
|
|
||||||
2:
|
|
||||||
vsetvli t6, a0, e8, m8, ta, ma
|
|
||||||
vmv.v.i v0, 0
|
|
||||||
3:
|
|
||||||
vsetvli t6, a0, e8, m8, ta, ma
|
|
||||||
vse8.v v0, (a3)
|
|
||||||
add a3, a3, t6
|
|
||||||
sub a0, a0, t6
|
|
||||||
bnez a0, 3b
|
|
||||||
ret
|
|
||||||
|
|
||||||
ret1:
|
|
||||||
li a0, -1
|
|
||||||
ret
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,3 +1,31 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
|
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
|
||||||
|
|
||||||
@@ -29,16 +57,9 @@
|
|||||||
|
|
||||||
#include "riscv64_multibinary.h"
|
#include "riscv64_multibinary.h"
|
||||||
|
|
||||||
#if HAVE_RVV
|
mbin_interface ec_encode_data
|
||||||
mbin_interface gf_vect_mul
|
mbin_interface gf_vect_mul
|
||||||
mbin_interface gf_vect_dot_prod
|
mbin_interface gf_vect_dot_prod
|
||||||
mbin_interface ec_encode_data
|
mbin_interface gf_vect_mad
|
||||||
#else
|
mbin_interface ec_encode_data_update
|
||||||
mbin_interface_base gf_vect_mul gf_vect_mul_base
|
mbin_interface ec_init_tables
|
||||||
mbin_interface_base gf_vect_dot_prod gf_vect_dot_prod_base
|
|
||||||
mbin_interface_base ec_encode_data ec_encode_data_base
|
|
||||||
#endif
|
|
||||||
|
|
||||||
mbin_interface_base ec_init_tables ec_init_tables_base
|
|
||||||
mbin_interface_base ec_encode_data_update ec_encode_data_update_base
|
|
||||||
mbin_interface_base gf_vect_mad gf_vect_mad_base
|
|
||||||
|
|||||||
@@ -1,78 +0,0 @@
|
|||||||
/**********************************************************************
|
|
||||||
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of ISCAS nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived
|
|
||||||
from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
**********************************************************************/
|
|
||||||
#include "riscv64_multibinary.h"
|
|
||||||
|
|
||||||
extern int
|
|
||||||
gf_vect_mul_rvv(int len, unsigned char *a, unsigned char *src, unsigned char *dest);
|
|
||||||
extern int
|
|
||||||
gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest);
|
|
||||||
extern void
|
|
||||||
gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest);
|
|
||||||
extern void
|
|
||||||
gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src,
|
|
||||||
unsigned char *dest);
|
|
||||||
extern void
|
|
||||||
ec_encode_data_rvv(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
|
|
||||||
unsigned char **dest);
|
|
||||||
extern void
|
|
||||||
ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
|
|
||||||
unsigned char **dest);
|
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
|
|
||||||
{
|
|
||||||
#if HAVE_RVV
|
|
||||||
const unsigned long hwcap = getauxval(AT_HWCAP);
|
|
||||||
if (hwcap & HWCAP_RV('V'))
|
|
||||||
return gf_vect_mul_rvv;
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
return gf_vect_mul_base;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
|
|
||||||
{
|
|
||||||
#if HAVE_RVV
|
|
||||||
const unsigned long hwcap = getauxval(AT_HWCAP);
|
|
||||||
if (hwcap & HWCAP_RV('V'))
|
|
||||||
return gf_vect_dot_prod_rvv;
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
return gf_vect_dot_prod_base;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
|
|
||||||
{
|
|
||||||
#if HAVE_RVV
|
|
||||||
const unsigned long hwcap = getauxval(AT_HWCAP);
|
|
||||||
if (hwcap & HWCAP_RV('V'))
|
|
||||||
return ec_encode_data_rvv;
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
return ec_encode_data_base;
|
|
||||||
}
|
|
||||||
147
erasure_code/riscv64/ec_riscv64_dispatcher.c
Normal file
147
erasure_code/riscv64/ec_riscv64_dispatcher.c
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
/**************************************************************
|
||||||
|
Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of sanechips Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
/**********************************************************************
|
||||||
|
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of ISCAS nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "riscv64_multibinary.h"
|
||||||
|
|
||||||
|
extern void
|
||||||
|
gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char *dest);
|
||||||
|
extern void
|
||||||
|
gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src,
|
||||||
|
unsigned char *dest);
|
||||||
|
extern void
|
||||||
|
gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char *dest);
|
||||||
|
extern void
|
||||||
|
gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
|
||||||
|
unsigned char *dest);
|
||||||
|
extern void
|
||||||
|
ec_encode_data_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls, unsigned char *data,
|
||||||
|
unsigned char **coding);
|
||||||
|
extern void
|
||||||
|
ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern int
|
||||||
|
gf_vect_mul_rvv(int len, unsigned char *a, unsigned char *src, unsigned char *dest);
|
||||||
|
extern int
|
||||||
|
gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest);
|
||||||
|
extern void
|
||||||
|
ec_encode_data_update_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||||
|
unsigned char *data, unsigned char **coding);
|
||||||
|
extern void
|
||||||
|
ec_encode_data_update_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls);
|
||||||
|
|
||||||
|
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
|
||||||
|
{
|
||||||
|
#if HAVE_RVV
|
||||||
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
|
if (auxval & HWCAP_RV('V'))
|
||||||
|
return gf_vect_dot_prod_rvv;
|
||||||
|
#endif
|
||||||
|
return gf_vect_dot_prod_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
|
||||||
|
{
|
||||||
|
#if HAVE_RVV
|
||||||
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
|
if (auxval & HWCAP_RV('V'))
|
||||||
|
return gf_vect_mad_rvv;
|
||||||
|
#endif
|
||||||
|
return gf_vect_mad_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
|
||||||
|
{
|
||||||
|
#if HAVE_RVV
|
||||||
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
|
if (auxval & HWCAP_RV('V'))
|
||||||
|
return ec_encode_data_rvv;
|
||||||
|
#endif
|
||||||
|
return ec_encode_data_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
|
||||||
|
{
|
||||||
|
#if HAVE_RVV
|
||||||
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
|
if (auxval & HWCAP_RV('V'))
|
||||||
|
return ec_encode_data_update_rvv;
|
||||||
|
#endif
|
||||||
|
return ec_encode_data_update_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
|
||||||
|
{
|
||||||
|
#if HAVE_RVV
|
||||||
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
|
if (auxval & HWCAP_RV('V'))
|
||||||
|
return gf_vect_mul_rvv;
|
||||||
|
#endif
|
||||||
|
return gf_vect_mul_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_INTERFACE_DISPATCHER(ec_init_tables) { return ec_init_tables_base; }
|
||||||
188
erasure_code/riscv64/ec_riscv64_highlevel_func.c
Normal file
188
erasure_code/riscv64/ec_riscv64_highlevel_func.c
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
/**************************************************************
|
||||||
|
Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of sanechips Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
#include "erasure_code.h"
|
||||||
|
|
||||||
|
/*external function*/
|
||||||
|
|
||||||
|
/* RVV */
|
||||||
|
extern void
|
||||||
|
gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char *dest);
|
||||||
|
extern void
|
||||||
|
gf_2vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_3vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_4vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_5vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_6vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_7vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_8vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char *dest);
|
||||||
|
extern void
|
||||||
|
gf_2vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_3vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_4vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_5vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_6vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
|
void
|
||||||
|
ec_encode_data_rvv(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
|
{
|
||||||
|
if (len < 16) {
|
||||||
|
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (rows > 11) {
|
||||||
|
gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
g_tbls += 6 * k * 32;
|
||||||
|
coding += 6;
|
||||||
|
rows -= 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (rows) {
|
||||||
|
case 11:
|
||||||
|
/* 7 + 4 */
|
||||||
|
gf_7vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
g_tbls += 7 * k * 32;
|
||||||
|
coding += 7;
|
||||||
|
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 10:
|
||||||
|
/* 6 + 4 */
|
||||||
|
gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
g_tbls += 6 * k * 32;
|
||||||
|
coding += 6;
|
||||||
|
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
/* 5 + 4 */
|
||||||
|
gf_5vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
g_tbls += 5 * k * 32;
|
||||||
|
coding += 5;
|
||||||
|
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
/* 4 + 4 */
|
||||||
|
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
g_tbls += 4 * k * 32;
|
||||||
|
coding += 4;
|
||||||
|
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
gf_7vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
gf_5vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
gf_3vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
gf_2vect_dot_prod_rvv(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
gf_vect_dot_prod_rvv(len, k, g_tbls, data, *coding);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ec_encode_data_update_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||||
|
unsigned char *data, unsigned char **coding)
|
||||||
|
{
|
||||||
|
if (len < 16) {
|
||||||
|
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
while (rows > 6) {
|
||||||
|
gf_6vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
g_tbls += 6 * k * 32;
|
||||||
|
coding += 6;
|
||||||
|
rows -= 6;
|
||||||
|
}
|
||||||
|
switch (rows) {
|
||||||
|
case 6:
|
||||||
|
gf_6vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
gf_5vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
gf_4vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
gf_3vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
gf_2vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
gf_vect_mad_rvv(len, k, vec_i, g_tbls, data, *coding);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -1,88 +0,0 @@
|
|||||||
/**********************************************************************
|
|
||||||
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions
|
|
||||||
are met:
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of ISCAS nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived
|
|
||||||
from this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
**********************************************************************/
|
|
||||||
// Reference: https://git.ffmpeg.org/gitweb/ffmpeg.git/commit/746f1ff36ac0d232687820fbde4e4efc79093af7
|
|
||||||
.macro const sym, align=3, relocate=0
|
|
||||||
.if \relocate
|
|
||||||
.pushsection .data.rel.ro
|
|
||||||
.else
|
|
||||||
.pushsection .rodata
|
|
||||||
.endif
|
|
||||||
.align \align
|
|
||||||
\sym:
|
|
||||||
|
|
||||||
.macro endconst
|
|
||||||
.size \sym, . - \sym
|
|
||||||
.popsection
|
|
||||||
.purgem endconst
|
|
||||||
.endm
|
|
||||||
.endm
|
|
||||||
|
|
||||||
const gff_base
|
|
||||||
.rept 2
|
|
||||||
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13
|
|
||||||
.byte 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30
|
|
||||||
.byte 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee
|
|
||||||
.byte 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2
|
|
||||||
.byte 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89
|
|
||||||
.byte 0x0f, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1
|
|
||||||
.byte 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0x0d
|
|
||||||
.byte 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93
|
|
||||||
.byte 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda
|
|
||||||
.byte 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4
|
|
||||||
.byte 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6
|
|
||||||
.byte 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b
|
|
||||||
.byte 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32
|
|
||||||
.byte 0x64, 0xc8, 0x8d, 0x07, 0x0e, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2
|
|
||||||
.byte 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, 0x12
|
|
||||||
.byte 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16
|
|
||||||
.byte 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e
|
|
||||||
.endr
|
|
||||||
.byte 0x01
|
|
||||||
endconst
|
|
||||||
|
|
||||||
const gflog_base
|
|
||||||
.byte 0x00, 0xff, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7
|
|
||||||
.byte 0x4b, 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08
|
|
||||||
.byte 0x4c, 0x71, 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, 0x35, 0x93, 0x8e, 0xda, 0xf0
|
|
||||||
.byte 0x12, 0x82, 0x45, 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, 0xc9, 0x9a, 0x09, 0x78
|
|
||||||
.byte 0x4d, 0xe4, 0x72, 0xa6, 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, 0xe2, 0x98, 0x25
|
|
||||||
.byte 0xb3, 0x10, 0x91, 0x22, 0x88, 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, 0xf1, 0xd2
|
|
||||||
.byte 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, 0x6b
|
|
||||||
.byte 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b
|
|
||||||
.byte 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63
|
|
||||||
.byte 0x0d, 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8
|
|
||||||
.byte 0xb4, 0x7c, 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, 0x37, 0x3f, 0xd1, 0x5b, 0x95
|
|
||||||
.byte 0xbc, 0xcf, 0xcd, 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, 0xf2, 0x56, 0xd3, 0xab
|
|
||||||
.byte 0x14, 0x2a, 0x5d, 0x9e, 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, 0x1f, 0x2d, 0x43
|
|
||||||
.byte 0xd8, 0xb7, 0x7b, 0xa4, 0x76, 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, 0x6c, 0xa1
|
|
||||||
.byte 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, 0xcb
|
|
||||||
.byte 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7
|
|
||||||
.byte 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58
|
|
||||||
.byte 0xaf
|
|
||||||
endconst
|
|
||||||
161
erasure_code/riscv64/gf_2vect_dot_prod_rvv.S
Normal file
161
erasure_code/riscv64/gf_2vect_dot_prod_rvv.S
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_2vect_dot_prod_rvv
|
||||||
|
.type gf_2vect_dot_prod_rvv, @function
|
||||||
|
|
||||||
|
/* void gf_2vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0 /* vector length */
|
||||||
|
#define x_vec a1 /* number of source vectors (ie. data blocks) */
|
||||||
|
#define x_tbl a2
|
||||||
|
#define x_src a3
|
||||||
|
#define x_dest a4
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_vec_i t0
|
||||||
|
#define x_ptr t1
|
||||||
|
#define x_pos t2
|
||||||
|
|
||||||
|
#define x_tbl1 t3
|
||||||
|
#define x_tbl2 t4
|
||||||
|
#define x_dest1 t5
|
||||||
|
#define x_dest2 a7
|
||||||
|
|
||||||
|
/* vectors */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v3
|
||||||
|
|
||||||
|
#define v_dest1 v4
|
||||||
|
|
||||||
|
#define v_gft1_lo v5
|
||||||
|
#define v_gft1_hi v6
|
||||||
|
|
||||||
|
#define v_gft2_lo v7
|
||||||
|
#define v_gft2_hi v8
|
||||||
|
#define v_dest2 v9
|
||||||
|
|
||||||
|
gf_2vect_dot_prod_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t6, 16
|
||||||
|
blt x_len, t6, .return_fail
|
||||||
|
|
||||||
|
vsetvli a5, x0, e8, m1 /* Set vector length to maximum */
|
||||||
|
|
||||||
|
li x_pos, 0
|
||||||
|
ld x_dest1, 0(x_dest)
|
||||||
|
ld x_dest2, 8(x_dest)
|
||||||
|
|
||||||
|
/* Loop 1: x_len, vector length */
|
||||||
|
.Llooprvv_vl:
|
||||||
|
bge x_pos, x_len, .return_pass
|
||||||
|
|
||||||
|
li x_vec_i, 0 /* clear x_vec_i */
|
||||||
|
ld x_ptr, 0(x_src) /* x_ptr: src base addr. */
|
||||||
|
|
||||||
|
vmv.v.i v_dest1, 0 /* clear v_dest1 */
|
||||||
|
vmv.v.i v_dest2, 0 /* clear v_dest2 */
|
||||||
|
|
||||||
|
/* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */
|
||||||
|
mv x_tbl1, x_tbl /* reset x_tbl1 */
|
||||||
|
slli t6, x_vec, 5
|
||||||
|
add x_tbl2, x_tbl1, t6 /* reset x_tbl2 */
|
||||||
|
|
||||||
|
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
|
||||||
|
.Llooprvv_vl_vects:
|
||||||
|
/* load src data */
|
||||||
|
slli a6, x_vec_i, 3
|
||||||
|
add a6,x_src,a6
|
||||||
|
ld x_ptr, 0(a6)
|
||||||
|
add x_ptr,x_ptr,x_pos
|
||||||
|
|
||||||
|
vle8.v v_src, (x_ptr) /* load from: src base + pos offset */
|
||||||
|
/* split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* gf_tbl addr: (x_tbl + dest_idx * x_vec * 32) + src_vec_idx * 32 */
|
||||||
|
/* load gf_table's */
|
||||||
|
vle8.v v_gft1_lo, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
vle8.v v_gft1_hi, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
|
||||||
|
vle8.v v_gft2_lo, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
vle8.v v_gft2_hi, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
|
||||||
|
/* dest 1 */
|
||||||
|
/* table indexing, ie. gf(2^8) multiplication */
|
||||||
|
vrgather.vv v26, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft1_hi, v_src_hi
|
||||||
|
/* exclusive or, ie. gf(2^8) add */
|
||||||
|
vxor.vv v_dest1, v_dest1, v26
|
||||||
|
vxor.vv v_dest1, v_dest1, v27
|
||||||
|
|
||||||
|
/* dest 2 */
|
||||||
|
vrgather.vv v26, v_gft2_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft2_hi, v_src_hi
|
||||||
|
vxor.vv v_dest2, v_dest2, v26
|
||||||
|
vxor.vv v_dest2, v_dest2, v27
|
||||||
|
|
||||||
|
/* calc for next */
|
||||||
|
addi x_vec_i, x_vec_i, 1 /* move x_vec_i to next */
|
||||||
|
blt x_vec_i, x_vec, .Llooprvv_vl_vects
|
||||||
|
/* end of Loop 2 */
|
||||||
|
|
||||||
|
/* store dest data */
|
||||||
|
vse8.v v_dest1, (x_dest1)
|
||||||
|
vse8.v v_dest2, (x_dest2)
|
||||||
|
add x_dest1,x_dest1,a5
|
||||||
|
add x_dest2,x_dest2,a5
|
||||||
|
|
||||||
|
/* increment one vector length */
|
||||||
|
add x_pos, x_pos, a5
|
||||||
|
j .Llooprvv_vl
|
||||||
|
/* end of Loop 1 */
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
li a0, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li a0, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
148
erasure_code/riscv64/gf_2vect_mad_rvv.S
Normal file
148
erasure_code/riscv64/gf_2vect_mad_rvv.S
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_2vect_mad_rvv
|
||||||
|
.type gf_2vect_mad_rvv, @function
|
||||||
|
|
||||||
|
/* gf_2vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
|
unsigned char *src, unsigned char **dest);
|
||||||
|
*/
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0
|
||||||
|
#define x_vec a1
|
||||||
|
#define x_vec_i a2
|
||||||
|
#define x_tbl a3
|
||||||
|
#define x_src a4
|
||||||
|
#define x_dest a5
|
||||||
|
|
||||||
|
/* returns */
|
||||||
|
#define w_ret a0
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_pos t0
|
||||||
|
#define x_dest2 t1
|
||||||
|
#define x_dest1 t2
|
||||||
|
|
||||||
|
/* vectors */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v1
|
||||||
|
#define v_dest1 v3
|
||||||
|
#define v_tmp_lo v4
|
||||||
|
#define v_tmp_hi v5
|
||||||
|
#define v_gft1_lo v6
|
||||||
|
#define v_gft1_hi v7
|
||||||
|
#define v_gft2_lo v17
|
||||||
|
#define v_gft2_hi v18
|
||||||
|
#define v_dest2 v27
|
||||||
|
|
||||||
|
gf_2vect_mad_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t3, 16
|
||||||
|
blt x_len, t3, .return_fail
|
||||||
|
|
||||||
|
vsetvli t4, x0, e8, m1
|
||||||
|
|
||||||
|
/* load table 1 */
|
||||||
|
slli t3, x_vec_i, 5
|
||||||
|
add x_tbl, x_tbl, t3
|
||||||
|
vle8.v v_gft1_lo, (x_tbl)
|
||||||
|
addi t3, x_tbl, 16
|
||||||
|
vle8.v v_gft1_hi, (t3)
|
||||||
|
|
||||||
|
/* load table 2 */
|
||||||
|
slli t3, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, t3
|
||||||
|
vle8.v v_gft2_lo, (x_tbl)
|
||||||
|
addi t3, x_tbl, 16
|
||||||
|
vle8.v v_gft2_hi, (t3)
|
||||||
|
|
||||||
|
/* load dest pointers */
|
||||||
|
ld x_dest1, 0(x_dest)
|
||||||
|
ld x_dest2, 8(x_dest)
|
||||||
|
|
||||||
|
li x_pos, 0
|
||||||
|
|
||||||
|
.Llooprvv_vl:
|
||||||
|
blt x_pos, x_len, .Lloop_body
|
||||||
|
j .return_pass
|
||||||
|
|
||||||
|
.Lloop_body:
|
||||||
|
/* load src data */
|
||||||
|
add t3, x_src, x_pos
|
||||||
|
vle8.v v_src, (t3)
|
||||||
|
|
||||||
|
/* split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* load dest data */
|
||||||
|
add t3, x_dest1, x_pos
|
||||||
|
vle8.v v_dest1, (t3)
|
||||||
|
add t3, x_dest2, x_pos
|
||||||
|
vle8.v v_dest2, (t3)
|
||||||
|
|
||||||
|
/* dest1 */
|
||||||
|
/* table indexing, ie. gf(2^8) multiplication */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi
|
||||||
|
/* exclusive or, ie. gf(2^8) add */
|
||||||
|
vxor.vv v_dest1, v_tmp_lo, v_dest1
|
||||||
|
vxor.vv v_dest1, v_tmp_hi, v_dest1
|
||||||
|
|
||||||
|
/* dest2 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi
|
||||||
|
vxor.vv v_dest2, v_tmp_lo, v_dest2
|
||||||
|
vxor.vv v_dest2, v_tmp_hi, v_dest2
|
||||||
|
|
||||||
|
/* store dest data */
|
||||||
|
add t3, x_dest1, x_pos
|
||||||
|
vse8.v v_dest1, (t3)
|
||||||
|
add t3, x_dest2, x_pos
|
||||||
|
vse8.v v_dest2, (t3)
|
||||||
|
|
||||||
|
/* increment one vector length */
|
||||||
|
add x_pos, x_pos, t4
|
||||||
|
|
||||||
|
j .Llooprvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
li w_ret, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li w_ret, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
188
erasure_code/riscv64/gf_3vect_dot_prod_rvv.S
Normal file
188
erasure_code/riscv64/gf_3vect_dot_prod_rvv.S
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_3vect_dot_prod_rvv
|
||||||
|
.type gf_3vect_dot_prod_rvv, @function
|
||||||
|
|
||||||
|
/* void gf_3vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0 /* vector length */
|
||||||
|
#define x_vec a1 /* number of source vectors (ie. data blocks) */
|
||||||
|
#define x_tbl a2 /* gftbls */
|
||||||
|
#define x_src a3 /* src */
|
||||||
|
#define x_dest a4 /* dest */
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_vec_i t1
|
||||||
|
#define x_ptr t2
|
||||||
|
#define x_pos t3
|
||||||
|
#define x_tbl1 t4
|
||||||
|
#define x_tbl2 t5
|
||||||
|
#define x_tbl3 t6
|
||||||
|
#define x_dest1 s0
|
||||||
|
#define x_dest2 s1
|
||||||
|
#define x_dest3 a5
|
||||||
|
#define t_offset a6
|
||||||
|
|
||||||
|
|
||||||
|
/* vectors */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v3
|
||||||
|
|
||||||
|
#define v_dest1 v4
|
||||||
|
#define v_dest2 v5
|
||||||
|
#define v_dest3 v6
|
||||||
|
|
||||||
|
#define v_gft1_lo v8
|
||||||
|
#define v_gft1_hi v9
|
||||||
|
#define v_gft2_lo v10
|
||||||
|
#define v_gft2_hi v11
|
||||||
|
#define v_gft3_lo v12
|
||||||
|
#define v_gft3_hi v13
|
||||||
|
|
||||||
|
gf_3vect_dot_prod_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t0, 16
|
||||||
|
blt x_len, t0, .return_fail
|
||||||
|
|
||||||
|
/* save callee-saved registers */
|
||||||
|
addi sp, sp, -16
|
||||||
|
sd s0, 0(sp)
|
||||||
|
sd s1, 8(sp)
|
||||||
|
|
||||||
|
vsetvli a7, x0, e8, m1 /* Set vector length to maximum */
|
||||||
|
|
||||||
|
li x_pos, 0
|
||||||
|
slli t_offset, x_vec, 5
|
||||||
|
ld x_dest1, 0(x_dest)
|
||||||
|
ld x_dest2, 8(x_dest)
|
||||||
|
ld x_dest3, 16(x_dest)
|
||||||
|
|
||||||
|
.Lloop_rvv_vl:
|
||||||
|
/* check if we have processed all elements */
|
||||||
|
bge x_pos, x_len, .return_pass
|
||||||
|
|
||||||
|
/* Clear destination vectors */
|
||||||
|
vmv.v.i v_dest1, 0
|
||||||
|
vmv.v.i v_dest2, 0
|
||||||
|
vmv.v.i v_dest3, 0
|
||||||
|
|
||||||
|
/* Reset table pointers */
|
||||||
|
mv x_tbl1, x_tbl
|
||||||
|
add x_tbl2, x_tbl1, t_offset
|
||||||
|
add x_tbl3, x_tbl2, t_offset
|
||||||
|
|
||||||
|
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
|
||||||
|
li x_vec_i, 0
|
||||||
|
.Lloop_rvv_vl_vects:
|
||||||
|
/* Load source data */
|
||||||
|
slli t0, x_vec_i, 3
|
||||||
|
add t0,x_src,t0
|
||||||
|
ld x_ptr, 0(t0)
|
||||||
|
add x_ptr,x_ptr,x_pos
|
||||||
|
|
||||||
|
vle8.v v_src, (x_ptr)
|
||||||
|
|
||||||
|
/* Split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* Load gf_table's */
|
||||||
|
vle8.v v_gft1_lo, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
vle8.v v_gft1_hi, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
vle8.v v_gft2_lo, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
vle8.v v_gft2_hi, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
|
||||||
|
|
||||||
|
/* Load next gf_table's */
|
||||||
|
vle8.v v_gft3_lo, (x_tbl3)
|
||||||
|
addi x_tbl3, x_tbl3, 16
|
||||||
|
vle8.v v_gft3_hi, (x_tbl3)
|
||||||
|
addi x_tbl3, x_tbl3, 16
|
||||||
|
|
||||||
|
/* dest 1 */
|
||||||
|
vrgather.vv v26, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft1_hi, v_src_hi
|
||||||
|
vxor.vv v_dest1, v_dest1, v26
|
||||||
|
vxor.vv v_dest1, v_dest1, v27
|
||||||
|
|
||||||
|
/* dest 2 */
|
||||||
|
vrgather.vv v26, v_gft2_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft2_hi, v_src_hi
|
||||||
|
vxor.vv v_dest2, v_dest2, v26
|
||||||
|
vxor.vv v_dest2, v_dest2, v27
|
||||||
|
|
||||||
|
/* dest 3 */
|
||||||
|
vrgather.vv v26, v_gft3_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft3_hi, v_src_hi
|
||||||
|
vxor.vv v_dest3, v_dest3, v26
|
||||||
|
vxor.vv v_dest3, v_dest3, v27
|
||||||
|
|
||||||
|
/* Move to next source vector */
|
||||||
|
addi x_vec_i, x_vec_i, 1
|
||||||
|
|
||||||
|
/* Check if we have processed all vectors */
|
||||||
|
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects
|
||||||
|
|
||||||
|
/* Store destination data */
|
||||||
|
vse8.v v_dest1, (x_dest1)
|
||||||
|
vse8.v v_dest2, (x_dest2)
|
||||||
|
vse8.v v_dest3, (x_dest3)
|
||||||
|
add x_dest1,x_dest1, a7
|
||||||
|
add x_dest2,x_dest2, a7
|
||||||
|
add x_dest3,x_dest3, a7
|
||||||
|
|
||||||
|
add x_pos, x_pos, a7
|
||||||
|
j .Lloop_rvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
ld s0, 0(sp)
|
||||||
|
ld s1, 8(sp)
|
||||||
|
addi sp, sp, 16
|
||||||
|
|
||||||
|
li a0, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li a0, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
170
erasure_code/riscv64/gf_3vect_mad_rvv.S
Normal file
170
erasure_code/riscv64/gf_3vect_mad_rvv.S
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_3vect_mad_rvv
|
||||||
|
.type gf_3vect_mad_rvv, @function
|
||||||
|
|
||||||
|
/* gf_3vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
|
unsigned char *src, unsigned char **dest);
|
||||||
|
*/
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0
|
||||||
|
#define x_vec a1
|
||||||
|
#define x_vec_i a2
|
||||||
|
#define x_tbl a3
|
||||||
|
#define x_src a4
|
||||||
|
#define x_dest a5
|
||||||
|
|
||||||
|
/* returns */
|
||||||
|
#define w_ret a0
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_pos t0
|
||||||
|
#define x_dest1 t1
|
||||||
|
#define x_dest2 t2
|
||||||
|
#define x_dest3 t3
|
||||||
|
|
||||||
|
/* vectors */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v3
|
||||||
|
#define v_dest1 v4
|
||||||
|
#define v_tmp_lo v5
|
||||||
|
#define v_tmp_hi v6
|
||||||
|
#define v_gft1_lo v7
|
||||||
|
#define v_gft1_hi v8
|
||||||
|
#define v_gft2_lo v9
|
||||||
|
#define v_gft2_hi v10
|
||||||
|
#define v_gft3_lo v11
|
||||||
|
#define v_gft3_hi v12
|
||||||
|
#define v_dest2 v19
|
||||||
|
#define v_dest3 v20
|
||||||
|
|
||||||
|
gf_3vect_mad_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t4, 16
|
||||||
|
blt x_len, t4, .return_fail
|
||||||
|
|
||||||
|
vsetvli t5, x0, e8, m1
|
||||||
|
|
||||||
|
/* Load table 1 */
|
||||||
|
slli t4, x_vec_i, 5
|
||||||
|
add x_tbl, x_tbl, t4
|
||||||
|
vle8.v v_gft1_lo, (x_tbl)
|
||||||
|
addi t4, x_tbl, 16
|
||||||
|
vle8.v v_gft1_hi, (t4)
|
||||||
|
|
||||||
|
/* Load table 2 */
|
||||||
|
slli t4, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, t4
|
||||||
|
vle8.v v_gft2_lo, (x_tbl)
|
||||||
|
addi t4, x_tbl, 16
|
||||||
|
vle8.v v_gft2_hi, (t4)
|
||||||
|
|
||||||
|
/* Load table 3 */
|
||||||
|
slli t4, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, t4
|
||||||
|
vle8.v v_gft3_lo, (x_tbl)
|
||||||
|
addi t4, x_tbl, 16
|
||||||
|
vle8.v v_gft3_hi, (t4)
|
||||||
|
|
||||||
|
|
||||||
|
/* Load destination pointers */
|
||||||
|
ld x_dest1, 0(x_dest)
|
||||||
|
ld x_dest2, 8(x_dest)
|
||||||
|
ld x_dest3, 16(x_dest)
|
||||||
|
|
||||||
|
li x_pos, 0
|
||||||
|
|
||||||
|
.Llooprvv_vl:
|
||||||
|
blt x_pos, x_len, .Lloop_body
|
||||||
|
j .return_pass
|
||||||
|
|
||||||
|
.Lloop_body:
|
||||||
|
/* Load source data */
|
||||||
|
add t6, x_src, x_pos
|
||||||
|
vle8.v v_src, (t6)
|
||||||
|
|
||||||
|
/* Split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* load dest data */
|
||||||
|
add t6, x_dest1, x_pos
|
||||||
|
vle8.v v_dest1, (t6)
|
||||||
|
add t6, x_dest2, x_pos
|
||||||
|
vle8.v v_dest2, (t6)
|
||||||
|
add t6, x_dest3, x_pos
|
||||||
|
vle8.v v_dest3, (t6)
|
||||||
|
|
||||||
|
/* dest1 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi
|
||||||
|
vxor.vv v_dest1, v_tmp_lo, v_dest1
|
||||||
|
vxor.vv v_dest1, v_tmp_hi, v_dest1
|
||||||
|
|
||||||
|
/* dest2 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi
|
||||||
|
vxor.vv v_dest2, v_tmp_lo, v_dest2
|
||||||
|
vxor.vv v_dest2, v_tmp_hi, v_dest2
|
||||||
|
|
||||||
|
/* dest3 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi
|
||||||
|
vxor.vv v_dest3, v_tmp_lo, v_dest3
|
||||||
|
vxor.vv v_dest3, v_tmp_hi, v_dest3
|
||||||
|
|
||||||
|
|
||||||
|
/* Store destination data */
|
||||||
|
add t6, x_dest1, x_pos
|
||||||
|
vse8.v v_dest1, (t6)
|
||||||
|
add t6, x_dest2, x_pos
|
||||||
|
vse8.v v_dest2, (t6)
|
||||||
|
add t6, x_dest3, x_pos
|
||||||
|
vse8.v v_dest3, (t6)
|
||||||
|
|
||||||
|
/* Increment position */
|
||||||
|
add x_pos, x_pos, t5
|
||||||
|
|
||||||
|
j .Llooprvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
li w_ret, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li w_ret, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
214
erasure_code/riscv64/gf_4vect_dot_prod_rvv.S
Normal file
214
erasure_code/riscv64/gf_4vect_dot_prod_rvv.S
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_4vect_dot_prod_rvv
|
||||||
|
.type gf_4vect_dot_prod_rvv, @function
|
||||||
|
|
||||||
|
/* void gf_4vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0 /* vector length */
|
||||||
|
#define x_vec a1 /* number of source vectors (ie. data blocks) */
|
||||||
|
#define x_tbl a2
|
||||||
|
#define x_src a3
|
||||||
|
#define x_dest a4
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_vec_i a7
|
||||||
|
#define x_ptr t1
|
||||||
|
#define x_pos t2
|
||||||
|
|
||||||
|
#define x_tbl1 t3
|
||||||
|
#define x_tbl2 t4
|
||||||
|
#define x_tbl3 t5
|
||||||
|
#define x_tbl4 t6
|
||||||
|
#define x_dest1 s0
|
||||||
|
#define x_dest2 s1
|
||||||
|
#define x_dest3 s2
|
||||||
|
#define x_dest4 s3
|
||||||
|
#define t_offset a5
|
||||||
|
|
||||||
|
/* vectors */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v3
|
||||||
|
|
||||||
|
#define v_dest1 v4
|
||||||
|
#define v_dest2 v5
|
||||||
|
#define v_dest3 v6
|
||||||
|
#define v_dest4 v7
|
||||||
|
|
||||||
|
#define v_gft1_lo v8
|
||||||
|
#define v_gft1_hi v9
|
||||||
|
#define v_gft2_lo v10
|
||||||
|
#define v_gft2_hi v11
|
||||||
|
#define v_gft3_lo v12
|
||||||
|
#define v_gft3_hi v13
|
||||||
|
#define v_gft4_lo v14
|
||||||
|
#define v_gft4_hi v15
|
||||||
|
|
||||||
|
gf_4vect_dot_prod_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t0, 16
|
||||||
|
blt x_len, t0, .return_fail
|
||||||
|
|
||||||
|
/* save callee-saved registers */
|
||||||
|
addi sp, sp, -32
|
||||||
|
sd s0, 0(sp)
|
||||||
|
sd s1, 8(sp)
|
||||||
|
sd s2, 16(sp)
|
||||||
|
sd s3, 24(sp)
|
||||||
|
|
||||||
|
vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
|
||||||
|
|
||||||
|
li x_pos, 0
|
||||||
|
slli t_offset, x_vec, 5
|
||||||
|
ld x_dest1, 0(x_dest)
|
||||||
|
ld x_dest2, 8(x_dest)
|
||||||
|
ld x_dest3, 16(x_dest)
|
||||||
|
ld x_dest4, 24(x_dest)
|
||||||
|
|
||||||
|
/* Loop 1: x_len, vector length */
|
||||||
|
.Lloop_rvv_vl:
|
||||||
|
/* check if we have processed all elements */
|
||||||
|
bge x_pos, x_len, .return_pass
|
||||||
|
|
||||||
|
/* Clear destination vectors */
|
||||||
|
vmv.v.i v_dest1, 0
|
||||||
|
vmv.v.i v_dest2, 0
|
||||||
|
vmv.v.i v_dest3, 0
|
||||||
|
vmv.v.i v_dest4, 0
|
||||||
|
|
||||||
|
/* Reset table pointers */
|
||||||
|
mv x_tbl1, x_tbl
|
||||||
|
add x_tbl2, x_tbl1, t_offset
|
||||||
|
add x_tbl3, x_tbl2, t_offset
|
||||||
|
add x_tbl4, x_tbl3, t_offset
|
||||||
|
|
||||||
|
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
|
||||||
|
li x_vec_i, 0
|
||||||
|
.Lloop_rvv_vl_vects:
|
||||||
|
/* Load source data */
|
||||||
|
slli a6, x_vec_i, 3
|
||||||
|
add a6,x_src,a6
|
||||||
|
ld x_ptr, 0(a6)
|
||||||
|
add x_ptr,x_ptr,x_pos
|
||||||
|
|
||||||
|
vle8.v v_src, (x_ptr)
|
||||||
|
|
||||||
|
/* Split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* Load gf_table's */
|
||||||
|
vle8.v v_gft1_lo, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
vle8.v v_gft1_hi, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
vle8.v v_gft2_lo, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
vle8.v v_gft2_hi, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
|
||||||
|
|
||||||
|
/* Load next gf_table's */
|
||||||
|
vle8.v v_gft3_lo, (x_tbl3)
|
||||||
|
addi x_tbl3, x_tbl3, 16
|
||||||
|
vle8.v v_gft3_hi, (x_tbl3)
|
||||||
|
addi x_tbl3, x_tbl3, 16
|
||||||
|
|
||||||
|
vle8.v v_gft4_lo, (x_tbl4)
|
||||||
|
addi x_tbl4, x_tbl4, 16
|
||||||
|
vle8.v v_gft4_hi, (x_tbl4)
|
||||||
|
addi x_tbl4, x_tbl4, 16
|
||||||
|
|
||||||
|
/* dest 1 */
|
||||||
|
vrgather.vv v26, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft1_hi, v_src_hi
|
||||||
|
vxor.vv v_dest1, v_dest1, v26
|
||||||
|
vxor.vv v_dest1, v_dest1, v27
|
||||||
|
|
||||||
|
/* dest 2 */
|
||||||
|
vrgather.vv v26, v_gft2_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft2_hi, v_src_hi
|
||||||
|
vxor.vv v_dest2, v_dest2, v26
|
||||||
|
vxor.vv v_dest2, v_dest2, v27
|
||||||
|
|
||||||
|
/* dest 3 */
|
||||||
|
vrgather.vv v26, v_gft3_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft3_hi, v_src_hi
|
||||||
|
vxor.vv v_dest3, v_dest3, v26
|
||||||
|
vxor.vv v_dest3, v_dest3, v27
|
||||||
|
|
||||||
|
/* dest 4 */
|
||||||
|
vrgather.vv v26, v_gft4_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft4_hi, v_src_hi
|
||||||
|
vxor.vv v_dest4, v_dest4, v26
|
||||||
|
vxor.vv v_dest4, v_dest4, v27
|
||||||
|
|
||||||
|
/* Move to next source vector */
|
||||||
|
addi x_vec_i, x_vec_i, 1
|
||||||
|
|
||||||
|
/* Check if we have processed all vectors */
|
||||||
|
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects
|
||||||
|
|
||||||
|
/* Store destination data */
|
||||||
|
vse8.v v_dest1, (x_dest1)
|
||||||
|
vse8.v v_dest2, (x_dest2)
|
||||||
|
vse8.v v_dest3, (x_dest3)
|
||||||
|
vse8.v v_dest4, (x_dest4)
|
||||||
|
add x_dest1,x_dest1, t0
|
||||||
|
add x_dest2,x_dest2, t0
|
||||||
|
add x_dest3,x_dest3, t0
|
||||||
|
add x_dest4,x_dest4, t0
|
||||||
|
/* Increment position */
|
||||||
|
add x_pos, x_pos, t0
|
||||||
|
j .Lloop_rvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
/* restore callee-saved registers */
|
||||||
|
ld s0, 0(sp)
|
||||||
|
ld s1, 8(sp)
|
||||||
|
ld s2, 16(sp)
|
||||||
|
ld s3, 24(sp)
|
||||||
|
addi sp, sp, 32
|
||||||
|
li a0, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li a0, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
189
erasure_code/riscv64/gf_4vect_mad_rvv.S
Normal file
189
erasure_code/riscv64/gf_4vect_mad_rvv.S
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_4vect_mad_rvv
|
||||||
|
.type gf_4vect_mad_rvv, @function
|
||||||
|
|
||||||
|
/* gf_4vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
|
unsigned char *src, unsigned char **dest);
|
||||||
|
*/
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0
|
||||||
|
#define x_vec a1
|
||||||
|
#define x_vec_i a2
|
||||||
|
#define x_tbl a3
|
||||||
|
#define x_src a4
|
||||||
|
#define x_dest a5
|
||||||
|
|
||||||
|
/* returns */
|
||||||
|
#define w_ret a0
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_pos t0
|
||||||
|
#define x_dest1 t1
|
||||||
|
#define x_dest2 t2
|
||||||
|
#define x_dest3 t3
|
||||||
|
#define x_dest4 t4
|
||||||
|
|
||||||
|
/* vectors */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v3
|
||||||
|
#define v_dest1 v4
|
||||||
|
#define v_tmp_lo v5
|
||||||
|
#define v_tmp_hi v6
|
||||||
|
#define v_gft1_lo v7
|
||||||
|
#define v_gft1_hi v8
|
||||||
|
#define v_gft2_lo v9
|
||||||
|
#define v_gft2_hi v10
|
||||||
|
#define v_gft3_lo v11
|
||||||
|
#define v_gft3_hi v12
|
||||||
|
#define v_gft4_lo v13
|
||||||
|
#define v_gft4_hi v14
|
||||||
|
#define v_dest2 v15
|
||||||
|
#define v_dest3 v16
|
||||||
|
#define v_dest4 v17
|
||||||
|
|
||||||
|
gf_4vect_mad_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t5, 16
|
||||||
|
blt x_len, t5, .return_fail
|
||||||
|
|
||||||
|
vsetvli t6, x0, e8, m1
|
||||||
|
|
||||||
|
/* load table 1 */
|
||||||
|
slli t5, x_vec_i, 5
|
||||||
|
add x_tbl, x_tbl, t5
|
||||||
|
vle8.v v_gft1_lo, (x_tbl)
|
||||||
|
addi t5, x_tbl, 16
|
||||||
|
vle8.v v_gft1_hi, (t5)
|
||||||
|
|
||||||
|
/* load table 2 */
|
||||||
|
slli t5, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, t5
|
||||||
|
vle8.v v_gft2_lo, (x_tbl)
|
||||||
|
addi t5, x_tbl, 16
|
||||||
|
vle8.v v_gft2_hi, (t5)
|
||||||
|
|
||||||
|
/* load table 3 */
|
||||||
|
slli t5, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, t5
|
||||||
|
vle8.v v_gft3_lo, (x_tbl)
|
||||||
|
addi t5, x_tbl, 16
|
||||||
|
vle8.v v_gft3_hi, (t5)
|
||||||
|
|
||||||
|
/* load table 4 */
|
||||||
|
slli t5, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, t5
|
||||||
|
vle8.v v_gft4_lo, (x_tbl)
|
||||||
|
addi t5, x_tbl, 16
|
||||||
|
vle8.v v_gft4_hi, (t5)
|
||||||
|
|
||||||
|
/* load dest pointers */
|
||||||
|
ld x_dest1, 0(x_dest)
|
||||||
|
ld x_dest2, 8(x_dest)
|
||||||
|
ld x_dest3, 16(x_dest)
|
||||||
|
ld x_dest4, 24(x_dest)
|
||||||
|
|
||||||
|
li x_pos, 0
|
||||||
|
|
||||||
|
.Llooprvv_vl:
|
||||||
|
blt x_pos, x_len, .Lloop_body
|
||||||
|
j .return_pass
|
||||||
|
.Lloop_body:
|
||||||
|
/* load src data */
|
||||||
|
add t5, x_src, x_pos
|
||||||
|
vle8.v v_src, (t5)
|
||||||
|
|
||||||
|
/* split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* load dest data */
|
||||||
|
add t5, x_dest1, x_pos
|
||||||
|
vle8.v v_dest1, (t5)
|
||||||
|
add t5, x_dest2, x_pos
|
||||||
|
vle8.v v_dest2, (t5)
|
||||||
|
add t5, x_dest3, x_pos
|
||||||
|
vle8.v v_dest3, (t5)
|
||||||
|
add t5, x_dest4, x_pos
|
||||||
|
vle8.v v_dest4, (t5)
|
||||||
|
|
||||||
|
/* dest1 */
|
||||||
|
/* table indexing, ie. gf(2^8) multiplication */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi
|
||||||
|
/* exclusive or, ie. gf(2^8) add */
|
||||||
|
vxor.vv v_dest1, v_tmp_lo, v_dest1
|
||||||
|
vxor.vv v_dest1, v_tmp_hi, v_dest1
|
||||||
|
|
||||||
|
/* dest2 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi
|
||||||
|
vxor.vv v_dest2, v_tmp_lo, v_dest2
|
||||||
|
vxor.vv v_dest2, v_tmp_hi, v_dest2
|
||||||
|
|
||||||
|
/* dest3 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi
|
||||||
|
vxor.vv v_dest3, v_tmp_lo, v_dest3
|
||||||
|
vxor.vv v_dest3, v_tmp_hi, v_dest3
|
||||||
|
|
||||||
|
/* dest4 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft4_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft4_hi, v_src_hi
|
||||||
|
vxor.vv v_dest4, v_tmp_lo, v_dest4
|
||||||
|
vxor.vv v_dest4, v_tmp_hi, v_dest4
|
||||||
|
|
||||||
|
/* store dest data */
|
||||||
|
add t5, x_dest1, x_pos
|
||||||
|
vse8.v v_dest1, (t5)
|
||||||
|
add t5, x_dest2, x_pos
|
||||||
|
vse8.v v_dest2, (t5)
|
||||||
|
add t5, x_dest3, x_pos
|
||||||
|
vse8.v v_dest3, (t5)
|
||||||
|
add t5, x_dest4, x_pos
|
||||||
|
vse8.v v_dest4, (t5)
|
||||||
|
|
||||||
|
add x_pos, x_pos, t6
|
||||||
|
j .Llooprvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
li w_ret, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li w_ret, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
242
erasure_code/riscv64/gf_5vect_dot_prod_rvv.S
Normal file
242
erasure_code/riscv64/gf_5vect_dot_prod_rvv.S
Normal file
@@ -0,0 +1,242 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_5vect_dot_prod_rvv
|
||||||
|
.type gf_5vect_dot_prod_rvv, @function
|
||||||
|
|
||||||
|
/* void gf_5vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0 /* vector length */
|
||||||
|
#define x_vec a1 /* number of source vectors (ie. data blocks) */
|
||||||
|
#define x_tbl a2
|
||||||
|
#define x_src a3
|
||||||
|
#define x_dest a4
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_vec_i a7
|
||||||
|
#define x_ptr t1
|
||||||
|
#define x_pos t2
|
||||||
|
#define x_tbl1 t3
|
||||||
|
#define x_tbl2 t4
|
||||||
|
#define x_tbl3 t5
|
||||||
|
#define x_tbl4 t6
|
||||||
|
|
||||||
|
#define x_tbl5 s0
|
||||||
|
#define x_dest1 s1
|
||||||
|
#define x_dest2 s2
|
||||||
|
#define x_dest3 s3
|
||||||
|
#define x_dest4 s4
|
||||||
|
#define x_dest5 s5
|
||||||
|
|
||||||
|
/* vectors */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v3
|
||||||
|
#define v_dest1 v4
|
||||||
|
#define v_gft1_lo v5
|
||||||
|
#define v_gft1_hi v6
|
||||||
|
#define v_gft2_lo v7
|
||||||
|
#define v_gft2_hi v8
|
||||||
|
#define v_gft3_lo v9
|
||||||
|
#define v_gft3_hi v10
|
||||||
|
#define v_gft4_lo v11
|
||||||
|
#define v_gft4_hi v12
|
||||||
|
#define v_gft5_lo v13
|
||||||
|
#define v_gft5_hi v14
|
||||||
|
#define v_dest2 v15
|
||||||
|
#define v_dest3 v16
|
||||||
|
#define v_dest4 v17
|
||||||
|
#define v_dest5 v18
|
||||||
|
|
||||||
|
gf_5vect_dot_prod_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t0, 16
|
||||||
|
blt x_len, t0, .return_fail
|
||||||
|
|
||||||
|
/* save s0-s4 */
|
||||||
|
addi sp, sp, -48
|
||||||
|
sd s0, 0(sp)
|
||||||
|
sd s1, 8(sp)
|
||||||
|
sd s2, 16(sp)
|
||||||
|
sd s3, 24(sp)
|
||||||
|
sd s4, 32(sp)
|
||||||
|
sd s5, 40(sp)
|
||||||
|
|
||||||
|
vsetvli a5, x0, e8, m1
|
||||||
|
|
||||||
|
/* Initialize position */
|
||||||
|
li x_pos, 0
|
||||||
|
|
||||||
|
/* Load destination pointers */
|
||||||
|
ld x_dest1, 0(x_dest)
|
||||||
|
ld x_dest2, 8(x_dest)
|
||||||
|
ld x_dest3, 16(x_dest)
|
||||||
|
ld x_dest4, 24(x_dest)
|
||||||
|
ld x_dest5, 32(x_dest)
|
||||||
|
|
||||||
|
/* Loop 1: x_len, vector length */
|
||||||
|
.Llooprvv_vl:
|
||||||
|
bge x_pos, x_len, .return_pass
|
||||||
|
|
||||||
|
/* Clear destination vectors */
|
||||||
|
vmv.v.i v_dest1, 0
|
||||||
|
vmv.v.i v_dest2, 0
|
||||||
|
vmv.v.i v_dest3, 0
|
||||||
|
vmv.v.i v_dest4, 0
|
||||||
|
vmv.v.i v_dest5, 0
|
||||||
|
|
||||||
|
/* Reset table pointers */
|
||||||
|
mv x_tbl1, x_tbl
|
||||||
|
slli t0, x_vec, 5
|
||||||
|
add x_tbl2, x_tbl1, t0
|
||||||
|
add x_tbl3, x_tbl2, t0
|
||||||
|
add x_tbl4, x_tbl3, t0
|
||||||
|
add x_tbl5, x_tbl4, t0
|
||||||
|
|
||||||
|
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
|
||||||
|
li x_vec_i, 0
|
||||||
|
.Llooprvv_vl_vects:
|
||||||
|
/* Load source data */
|
||||||
|
slli a6, x_vec_i, 3
|
||||||
|
add a6,x_src,a6
|
||||||
|
ld x_ptr, 0(a6)
|
||||||
|
add x_ptr, x_ptr, x_pos
|
||||||
|
vle8.v v_src, (x_ptr)
|
||||||
|
|
||||||
|
/* Split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* Load gf_table's */
|
||||||
|
vle8.v v_gft1_lo, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
vle8.v v_gft1_hi, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
|
||||||
|
vle8.v v_gft2_lo, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
vle8.v v_gft2_hi, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
|
||||||
|
/* Move to next source vector */
|
||||||
|
addi x_vec_i, x_vec_i, 1
|
||||||
|
|
||||||
|
/* dest 1 */
|
||||||
|
vrgather.vv v26, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft1_hi, v_src_hi
|
||||||
|
vxor.vv v_dest1, v_dest1, v26
|
||||||
|
vxor.vv v_dest1, v_dest1, v27
|
||||||
|
|
||||||
|
/* Load more gf_table's */
|
||||||
|
vle8.v v_gft3_lo, (x_tbl3)
|
||||||
|
addi x_tbl3, x_tbl3, 16
|
||||||
|
vle8.v v_gft3_hi, (x_tbl3)
|
||||||
|
addi x_tbl3, x_tbl3, 16
|
||||||
|
|
||||||
|
vle8.v v_gft4_lo, (x_tbl4)
|
||||||
|
addi x_tbl4, x_tbl4, 16
|
||||||
|
vle8.v v_gft4_hi, (x_tbl4)
|
||||||
|
addi x_tbl4, x_tbl4, 16
|
||||||
|
|
||||||
|
/* dest 2 */
|
||||||
|
vrgather.vv v26, v_gft2_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft2_hi, v_src_hi
|
||||||
|
vxor.vv v_dest2, v_dest2, v26
|
||||||
|
vxor.vv v_dest2, v_dest2, v27
|
||||||
|
|
||||||
|
/* dest 3 */
|
||||||
|
vrgather.vv v26, v_gft3_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft3_hi, v_src_hi
|
||||||
|
vxor.vv v_dest3, v_dest3, v26
|
||||||
|
vxor.vv v_dest3, v_dest3, v27
|
||||||
|
|
||||||
|
/* Load more gf_table's */
|
||||||
|
vle8.v v_gft5_lo, (x_tbl5)
|
||||||
|
addi x_tbl5, x_tbl5, 16
|
||||||
|
vle8.v v_gft5_hi, (x_tbl5)
|
||||||
|
addi x_tbl5, x_tbl5, 16
|
||||||
|
|
||||||
|
/* dest 4 */
|
||||||
|
vrgather.vv v26, v_gft4_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft4_hi, v_src_hi
|
||||||
|
vxor.vv v_dest4, v_dest4, v26
|
||||||
|
vxor.vv v_dest4, v_dest4, v27
|
||||||
|
|
||||||
|
/* dest 5 */
|
||||||
|
vrgather.vv v26, v_gft5_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft5_hi, v_src_hi
|
||||||
|
vxor.vv v_dest5, v_dest5, v26
|
||||||
|
vxor.vv v_dest5, v_dest5, v27
|
||||||
|
|
||||||
|
/* Check if we have processed all vectors */
|
||||||
|
blt x_vec_i, x_vec, .Llooprvv_vl_vects
|
||||||
|
|
||||||
|
|
||||||
|
vse8.v v_dest1, (x_dest1)
|
||||||
|
vse8.v v_dest2, (x_dest2)
|
||||||
|
vse8.v v_dest3, (x_dest3)
|
||||||
|
vse8.v v_dest4, (x_dest4)
|
||||||
|
vse8.v v_dest5, (x_dest5)
|
||||||
|
|
||||||
|
/* Store destination data */
|
||||||
|
add x_dest1,x_dest1,a5
|
||||||
|
add x_dest2,x_dest2,a5
|
||||||
|
add x_dest3,x_dest3,a5
|
||||||
|
add x_dest4,x_dest4,a5
|
||||||
|
add x_dest5,x_dest5,a5
|
||||||
|
|
||||||
|
/* Increment position */
|
||||||
|
add x_pos, x_pos, a5
|
||||||
|
j .Llooprvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
/* Restore callee-saved registers */
|
||||||
|
ld s0, 0(sp)
|
||||||
|
ld s1, 8(sp)
|
||||||
|
ld s2, 16(sp)
|
||||||
|
ld s3, 24(sp)
|
||||||
|
ld s4, 32(sp)
|
||||||
|
ld s5, 40(sp)
|
||||||
|
addi sp, sp, 48
|
||||||
|
|
||||||
|
li a0, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li a0, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
214
erasure_code/riscv64/gf_5vect_mad_rvv.S
Normal file
214
erasure_code/riscv64/gf_5vect_mad_rvv.S
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_5vect_mad_rvv
|
||||||
|
.type gf_5vect_mad_rvv, @function
|
||||||
|
|
||||||
|
/* gf_5vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
|
unsigned char *src, unsigned char **dest);
|
||||||
|
*/
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0
|
||||||
|
#define x_vec a1
|
||||||
|
#define x_vec_i a2
|
||||||
|
#define x_tbl a3
|
||||||
|
#define x_src a4
|
||||||
|
#define x_dest a5
|
||||||
|
|
||||||
|
/* returns */
|
||||||
|
#define w_ret a0
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_pos t0
|
||||||
|
#define x_dest1 t1
|
||||||
|
#define x_dest2 t2
|
||||||
|
#define x_dest3 t3
|
||||||
|
#define x_dest4 t4
|
||||||
|
#define x_dest5 t5
|
||||||
|
|
||||||
|
/* vectors */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v3
|
||||||
|
#define v_dest1 v4
|
||||||
|
#define v_tmp_lo v5
|
||||||
|
#define v_tmp_hi v6
|
||||||
|
#define v_gft1_lo v7
|
||||||
|
#define v_gft1_hi v8
|
||||||
|
#define v_gft2_lo v9
|
||||||
|
#define v_gft2_hi v10
|
||||||
|
#define v_gft3_lo v11
|
||||||
|
#define v_gft3_hi v12
|
||||||
|
#define v_gft4_lo v13
|
||||||
|
#define v_gft4_hi v14
|
||||||
|
#define v_gft5_lo v15
|
||||||
|
#define v_gft5_hi v16
|
||||||
|
#define v_dest2 v19
|
||||||
|
#define v_dest3 v20
|
||||||
|
#define v_dest4 v21
|
||||||
|
#define v_dest5 v22
|
||||||
|
|
||||||
|
gf_5vect_mad_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t6, 16
|
||||||
|
blt x_len, t6, .return_fail
|
||||||
|
|
||||||
|
vsetvli a7, x0, e8, m1
|
||||||
|
|
||||||
|
/* Load table 1 */
|
||||||
|
slli a6, x_vec_i, 5
|
||||||
|
add x_tbl, x_tbl, a6
|
||||||
|
vle8.v v_gft1_lo, (x_tbl)
|
||||||
|
addi a6, x_tbl, 16
|
||||||
|
vle8.v v_gft1_hi, (a6)
|
||||||
|
|
||||||
|
/* Load table 2 */
|
||||||
|
slli a6, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, a6
|
||||||
|
vle8.v v_gft2_lo, (x_tbl)
|
||||||
|
addi a6, x_tbl, 16
|
||||||
|
vle8.v v_gft2_hi, (a6)
|
||||||
|
|
||||||
|
/* Load table 3 */
|
||||||
|
slli a6, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, a6
|
||||||
|
vle8.v v_gft3_lo, (x_tbl)
|
||||||
|
addi a6, x_tbl, 16
|
||||||
|
vle8.v v_gft3_hi, (a6)
|
||||||
|
|
||||||
|
/* Load table 4 */
|
||||||
|
slli a6, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, a6
|
||||||
|
vle8.v v_gft4_lo, (x_tbl)
|
||||||
|
addi a6, x_tbl, 16
|
||||||
|
vle8.v v_gft4_hi, (a6)
|
||||||
|
|
||||||
|
/* Load table 5 */
|
||||||
|
slli a6, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, a6
|
||||||
|
vle8.v v_gft5_lo, (x_tbl)
|
||||||
|
addi a6, x_tbl, 16
|
||||||
|
vle8.v v_gft5_hi, (a6)
|
||||||
|
|
||||||
|
|
||||||
|
/* Load destination pointers */
|
||||||
|
ld x_dest1, 0(x_dest)
|
||||||
|
ld x_dest2, 8(x_dest)
|
||||||
|
ld x_dest3, 16(x_dest)
|
||||||
|
ld x_dest4, 24(x_dest)
|
||||||
|
ld x_dest5, 32(x_dest)
|
||||||
|
|
||||||
|
li x_pos, 0
|
||||||
|
|
||||||
|
.Llooprvv_vl:
|
||||||
|
blt x_pos, x_len, .Lloop_body
|
||||||
|
j .return_pass
|
||||||
|
|
||||||
|
.Lloop_body:
|
||||||
|
/* Load source data */
|
||||||
|
add t6, x_src, x_pos
|
||||||
|
vle8.v v_src, (t6)
|
||||||
|
|
||||||
|
/* Split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* load dest data */
|
||||||
|
add t6, x_dest1, x_pos
|
||||||
|
vle8.v v_dest1, (t6)
|
||||||
|
add t6, x_dest2, x_pos
|
||||||
|
vle8.v v_dest2, (t6)
|
||||||
|
add t6, x_dest3, x_pos
|
||||||
|
vle8.v v_dest3, (t6)
|
||||||
|
add t6, x_dest4, x_pos
|
||||||
|
vle8.v v_dest4, (t6)
|
||||||
|
add t6, x_dest5, x_pos
|
||||||
|
vle8.v v_dest5, (t6)
|
||||||
|
|
||||||
|
/* dest1 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi
|
||||||
|
vxor.vv v_dest1, v_tmp_lo, v_dest1
|
||||||
|
vxor.vv v_dest1, v_tmp_hi, v_dest1
|
||||||
|
|
||||||
|
/* dest2 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi
|
||||||
|
vxor.vv v_dest2, v_tmp_lo, v_dest2
|
||||||
|
vxor.vv v_dest2, v_tmp_hi, v_dest2
|
||||||
|
|
||||||
|
/* dest3 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi
|
||||||
|
vxor.vv v_dest3, v_tmp_lo, v_dest3
|
||||||
|
vxor.vv v_dest3, v_tmp_hi, v_dest3
|
||||||
|
|
||||||
|
/* dest4 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft4_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft4_hi, v_src_hi
|
||||||
|
vxor.vv v_dest4, v_tmp_lo, v_dest4
|
||||||
|
vxor.vv v_dest4, v_tmp_hi, v_dest4
|
||||||
|
|
||||||
|
/* dest5 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft5_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft5_hi, v_src_hi
|
||||||
|
vxor.vv v_dest5, v_tmp_lo, v_dest5
|
||||||
|
vxor.vv v_dest5, v_tmp_hi, v_dest5
|
||||||
|
|
||||||
|
|
||||||
|
/* Store destination data */
|
||||||
|
add t6, x_dest1, x_pos
|
||||||
|
vse8.v v_dest1, (t6)
|
||||||
|
add t6, x_dest2, x_pos
|
||||||
|
vse8.v v_dest2, (t6)
|
||||||
|
add t6, x_dest3, x_pos
|
||||||
|
vse8.v v_dest3, (t6)
|
||||||
|
add t6, x_dest4, x_pos
|
||||||
|
vse8.v v_dest4, (t6)
|
||||||
|
add t6, x_dest5, x_pos
|
||||||
|
vse8.v v_dest5, (t6)
|
||||||
|
|
||||||
|
/* Increment position */
|
||||||
|
add x_pos, x_pos, a7
|
||||||
|
|
||||||
|
j .Llooprvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
li w_ret, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li w_ret, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
273
erasure_code/riscv64/gf_6vect_dot_prod_rvv.S
Normal file
273
erasure_code/riscv64/gf_6vect_dot_prod_rvv.S
Normal file
@@ -0,0 +1,273 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_6vect_dot_prod_rvv
|
||||||
|
.type gf_6vect_dot_prod_rvv, @function
|
||||||
|
|
||||||
|
/* void gf_6vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0 /* vector length */
|
||||||
|
#define x_vec a1 /* number of source vectors (ie. data blocks) */
|
||||||
|
#define x_tbl a2 /* gftbls */
|
||||||
|
#define x_src a3 /* src */
|
||||||
|
#define x_dest a4 /* dest */
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_vec_i a7 /* loop counter for vectors */
|
||||||
|
#define x_ptr t1 /* pointer to current src */
|
||||||
|
#define x_pos t2 /* position in vector */
|
||||||
|
#define x_tbl1 t3 /* table pointer 1 */
|
||||||
|
#define x_tbl2 t4 /* table pointer 2 */
|
||||||
|
#define x_tbl3 t5 /* table pointer 3 */
|
||||||
|
#define x_tbl4 t6 /* table pointer 4 */
|
||||||
|
#define x_tbl5 s0 /* table pointer 5 */
|
||||||
|
#define x_tbl6 s1 /* table pointer 6 */
|
||||||
|
#define x_dest1 s2 /* dest pointer 1 */
|
||||||
|
#define x_dest2 s3 /* dest pointer 2 */
|
||||||
|
#define x_dest3 s4 /* dest pointer 3 */
|
||||||
|
#define x_dest4 s5 /* dest pointer 4 t12 -- x28 */
|
||||||
|
#define x_dest5 s6 /* dest pointer 5 */
|
||||||
|
#define x_dest6 s7 /* dest pointer 6 */
|
||||||
|
|
||||||
|
/* vector registers */
|
||||||
|
#define v_src v1 /* source vector */
|
||||||
|
#define v_src_lo v2 /* low 4 bits of source */
|
||||||
|
#define v_src_hi v3 /* high 4 bits of source */
|
||||||
|
#define v_dest1 v4 /* destination vector 1 */
|
||||||
|
#define v_dest2 v5 /* destination vector 2 */
|
||||||
|
#define v_dest3 v6 /* destination vector 3 */
|
||||||
|
#define v_dest4 v7 /* destination vector 4 */
|
||||||
|
#define v_dest5 v8 /* destination vector 5 */
|
||||||
|
#define v_dest6 v9 /* destination vector 6 */
|
||||||
|
#define v_gft1_lo v10 /* gf table 1 low */
|
||||||
|
#define v_gft1_hi v11 /* gf table 1 high */
|
||||||
|
#define v_gft2_lo v12 /* gf table 2 low */
|
||||||
|
#define v_gft2_hi v13 /* gf table 2 high */
|
||||||
|
#define v_gft3_lo v14 /* gf table 3 low */
|
||||||
|
#define v_gft3_hi v15 /* gf table 3 high */
|
||||||
|
#define v_gft4_lo v16 /* gf table 4 low */
|
||||||
|
#define v_gft4_hi v17 /* gf table 4 high */
|
||||||
|
#define v_gft5_lo v18 /* gf table 5 low */
|
||||||
|
#define v_gft5_hi v19 /* gf table 5 high */
|
||||||
|
#define v_gft6_lo v20 /* gf table 6 low */
|
||||||
|
#define v_gft6_hi v21 /* gf table 6 high */
|
||||||
|
|
||||||
|
gf_6vect_dot_prod_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t0, 16
|
||||||
|
blt x_len, t0, .return_fail
|
||||||
|
|
||||||
|
/* save callee-saved registers */
|
||||||
|
addi sp, sp, -64
|
||||||
|
sd s0, 0(sp)
|
||||||
|
sd s1, 8(sp)
|
||||||
|
sd s2, 16(sp)
|
||||||
|
sd s3, 24(sp)
|
||||||
|
sd s4, 32(sp)
|
||||||
|
sd s5, 40(sp)
|
||||||
|
sd s6, 48(sp)
|
||||||
|
sd s7, 56(sp)
|
||||||
|
|
||||||
|
li t0, 0x0F
|
||||||
|
vsetvli a5, x0, e8, m1
|
||||||
|
|
||||||
|
/* initialize position */
|
||||||
|
li x_pos, 0
|
||||||
|
|
||||||
|
/* load destination pointers */
|
||||||
|
ld x_dest1, 0(x14) # a4 is also x14
|
||||||
|
ld x_dest2, 8(x_dest)
|
||||||
|
ld x_dest3, 16(x_dest)
|
||||||
|
ld x_dest4, 24(x_dest)
|
||||||
|
ld x_dest5, 32(x_dest)
|
||||||
|
ld x_dest6, 40(x_dest)
|
||||||
|
|
||||||
|
.Llooprvv_vl:
|
||||||
|
/* check if we have processed all elements */
|
||||||
|
bge x_pos, x_len, .return_pass
|
||||||
|
|
||||||
|
/* initialize vector loop counter */
|
||||||
|
li x_vec_i, 0
|
||||||
|
|
||||||
|
/* load source pointer */
|
||||||
|
ld x_ptr, 0(x_src)
|
||||||
|
|
||||||
|
/* clear destination vectors */
|
||||||
|
vmv.v.i v_dest1, 0
|
||||||
|
vmv.v.i v_dest2, 0
|
||||||
|
vmv.v.i v_dest3, 0
|
||||||
|
vmv.v.i v_dest4, 0
|
||||||
|
vmv.v.i v_dest5, 0
|
||||||
|
vmv.v.i v_dest6, 0
|
||||||
|
|
||||||
|
/* initialize table pointers */
|
||||||
|
/* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */
|
||||||
|
mv x_tbl1, x_tbl
|
||||||
|
slli t0, x_vec, 5
|
||||||
|
add x_tbl2, x_tbl1, t0
|
||||||
|
add x_tbl3, x_tbl2, t0
|
||||||
|
add x_tbl4, x_tbl3, t0
|
||||||
|
add x_tbl5, x_tbl4, t0
|
||||||
|
add x_tbl6, x_tbl5, t0
|
||||||
|
|
||||||
|
.Llooprvv_vl_vects:
|
||||||
|
/* load source data */
|
||||||
|
slli a6, x_vec_i, 3
|
||||||
|
add a6,x_src,a6
|
||||||
|
ld x_ptr, 0(a6)
|
||||||
|
add x_ptr,x_ptr,x_pos
|
||||||
|
|
||||||
|
vle8.v v_src, (x_ptr)
|
||||||
|
|
||||||
|
|
||||||
|
/* split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* load gf_table's */
|
||||||
|
vle8.v v_gft1_lo, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
vle8.v v_gft1_hi, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
|
||||||
|
vle8.v v_gft2_lo, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
vle8.v v_gft2_hi, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
|
||||||
|
vle8.v v_gft3_lo, (x_tbl3)
|
||||||
|
addi x_tbl3, x_tbl3, 16
|
||||||
|
vle8.v v_gft3_hi, (x_tbl3)
|
||||||
|
addi x_tbl3, x_tbl3, 16
|
||||||
|
|
||||||
|
vle8.v v_gft4_lo, (x_tbl4)
|
||||||
|
addi x_tbl4, x_tbl4, 16
|
||||||
|
vle8.v v_gft4_hi, (x_tbl4)
|
||||||
|
addi x_tbl4, x_tbl4, 16
|
||||||
|
|
||||||
|
vle8.v v_gft5_lo, (x_tbl5)
|
||||||
|
addi x_tbl5, x_tbl5, 16
|
||||||
|
vle8.v v_gft5_hi, (x_tbl5)
|
||||||
|
addi x_tbl5, x_tbl5, 16
|
||||||
|
|
||||||
|
vle8.v v_gft6_lo, (x_tbl6)
|
||||||
|
addi x_tbl6, x_tbl6, 16
|
||||||
|
vle8.v v_gft6_hi, (x_tbl6)
|
||||||
|
addi x_tbl6, x_tbl6, 16
|
||||||
|
|
||||||
|
|
||||||
|
/* dest 1 */
|
||||||
|
vrgather.vv v26, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft1_hi, v_src_hi
|
||||||
|
vxor.vv v_dest1, v_dest1, v26
|
||||||
|
vxor.vv v_dest1, v_dest1, v27
|
||||||
|
|
||||||
|
/* dest 2 */
|
||||||
|
vrgather.vv v26, v_gft2_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft2_hi, v_src_hi
|
||||||
|
vxor.vv v_dest2, v_dest2, v26
|
||||||
|
vxor.vv v_dest2, v_dest2, v27
|
||||||
|
|
||||||
|
/* GF multiplication and accumulation for dest3 */
|
||||||
|
vrgather.vv v26, v_gft3_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft3_hi, v_src_hi
|
||||||
|
vxor.vv v_dest3, v_dest3, v26
|
||||||
|
vxor.vv v_dest3, v_dest3, v27
|
||||||
|
|
||||||
|
/* GF multiplication and accumulation for dest4 */
|
||||||
|
vrgather.vv v26, v_gft4_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft4_hi, v_src_hi
|
||||||
|
vxor.vv v_dest4, v_dest4, v26
|
||||||
|
vxor.vv v_dest4, v_dest4, v27
|
||||||
|
|
||||||
|
/* GF multiplication and accumulation for dest5 */
|
||||||
|
vrgather.vv v26, v_gft5_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft5_hi, v_src_hi
|
||||||
|
vxor.vv v_dest5, v_dest5, v26
|
||||||
|
vxor.vv v_dest5, v_dest5, v27
|
||||||
|
|
||||||
|
/* GF multiplication and accumulation for dest6 */
|
||||||
|
vrgather.vv v26, v_gft6_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft6_hi, v_src_hi
|
||||||
|
vxor.vv v_dest6, v_dest6, v26
|
||||||
|
vxor.vv v_dest6, v_dest6, v27
|
||||||
|
|
||||||
|
|
||||||
|
/* load next source pointer */
|
||||||
|
addi x_vec_i, x_vec_i,1
|
||||||
|
|
||||||
|
/* check if we have processed all vectors */
|
||||||
|
blt x_vec_i, x_vec, .Llooprvv_vl_vects
|
||||||
|
|
||||||
|
/* store destination data */
|
||||||
|
vse8.v v_dest1, (x_dest1) # x_dest1 v_dest1==v4
|
||||||
|
vse8.v v_dest2, (x_dest2) #x_dest2
|
||||||
|
vse8.v v_dest3, (x_dest3) #x_dest3
|
||||||
|
vse8.v v_dest4, (x_dest4) # x_dest4
|
||||||
|
vse8.v v_dest5, (x_dest5) # x_dest5
|
||||||
|
vse8.v v_dest6, (x_dest6) # x_dest6
|
||||||
|
|
||||||
|
add x_dest1,x_dest1, a5
|
||||||
|
add x_dest2,x_dest2, a5
|
||||||
|
add x_dest3,x_dest3, a5
|
||||||
|
add x_dest4,x_dest4, a5
|
||||||
|
add x_dest5,x_dest5, a5
|
||||||
|
add x_dest6,x_dest6, a5
|
||||||
|
|
||||||
|
/* increment position */
|
||||||
|
add x_pos, x_pos, a5
|
||||||
|
j .Llooprvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
/* restore callee-saved registers */
|
||||||
|
ld s0, 0(sp)
|
||||||
|
ld s1, 8(sp)
|
||||||
|
ld s2, 16(sp)
|
||||||
|
ld s3, 24(sp)
|
||||||
|
ld s4, 32(sp)
|
||||||
|
ld s5, 40(sp)
|
||||||
|
ld s6, 48(sp)
|
||||||
|
ld s7, 56(sp)
|
||||||
|
addi sp, sp, 64
|
||||||
|
|
||||||
|
li a0, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li a0, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
241
erasure_code/riscv64/gf_6vect_mad_rvv.S
Normal file
241
erasure_code/riscv64/gf_6vect_mad_rvv.S
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_6vect_mad_rvv
|
||||||
|
.type gf_6vect_mad_rvv, @function
|
||||||
|
|
||||||
|
/* gf_6vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
|
unsigned char *src, unsigned char **dest);
|
||||||
|
*/
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0
|
||||||
|
#define x_vec a1
|
||||||
|
#define x_vec_i a2
|
||||||
|
#define x_tbl a3
|
||||||
|
#define x_src a4
|
||||||
|
#define x_dest a5
|
||||||
|
|
||||||
|
/* returns */
|
||||||
|
#define w_ret a0
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_pos t0
|
||||||
|
#define x_dest1 t1
|
||||||
|
#define x_dest2 t2
|
||||||
|
#define x_dest3 t3
|
||||||
|
#define x_dest4 t4
|
||||||
|
#define x_dest5 t5
|
||||||
|
#define x_dest6 t6
|
||||||
|
|
||||||
|
/* vectors */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v3
|
||||||
|
#define v_dest1 v4
|
||||||
|
#define v_tmp_lo v5
|
||||||
|
#define v_tmp_hi v6
|
||||||
|
#define v_gft1_lo v7
|
||||||
|
#define v_gft1_hi v8
|
||||||
|
#define v_gft2_lo v9
|
||||||
|
#define v_gft2_hi v10
|
||||||
|
#define v_gft3_lo v11
|
||||||
|
#define v_gft3_hi v12
|
||||||
|
#define v_gft4_lo v13
|
||||||
|
#define v_gft4_hi v14
|
||||||
|
#define v_gft5_lo v15
|
||||||
|
#define v_gft5_hi v16
|
||||||
|
#define v_gft6_lo v17
|
||||||
|
#define v_gft6_hi v18
|
||||||
|
#define v_dest2 v19
|
||||||
|
#define v_dest3 v20
|
||||||
|
#define v_dest4 v21
|
||||||
|
#define v_dest5 v22
|
||||||
|
#define v_dest6 v23
|
||||||
|
|
||||||
|
gf_6vect_mad_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t6, 16
|
||||||
|
blt x_len, t6, .return_fail
|
||||||
|
|
||||||
|
/* save callee-saved registers */
|
||||||
|
addi sp, sp, -16
|
||||||
|
sd s8, 0(sp)
|
||||||
|
|
||||||
|
vsetvli a6, x0, e8, m1
|
||||||
|
|
||||||
|
/* Load table 1 */
|
||||||
|
slli s8, x_vec_i, 5
|
||||||
|
add x_tbl, x_tbl, s8
|
||||||
|
vle8.v v_gft1_lo, (x_tbl)
|
||||||
|
addi s8, x_tbl, 16
|
||||||
|
vle8.v v_gft1_hi, (s8)
|
||||||
|
|
||||||
|
/* Load table 2 */
|
||||||
|
slli s8, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, s8
|
||||||
|
vle8.v v_gft2_lo, (x_tbl)
|
||||||
|
addi s8, x_tbl, 16
|
||||||
|
vle8.v v_gft2_hi, (s8)
|
||||||
|
|
||||||
|
/* Load table 3 */
|
||||||
|
slli s8, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, s8
|
||||||
|
vle8.v v_gft3_lo, (x_tbl)
|
||||||
|
addi s8, x_tbl, 16
|
||||||
|
vle8.v v_gft3_hi, (s8)
|
||||||
|
|
||||||
|
/* Load table 4 */
|
||||||
|
slli s8, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, s8
|
||||||
|
vle8.v v_gft4_lo, (x_tbl)
|
||||||
|
addi s8, x_tbl, 16
|
||||||
|
vle8.v v_gft4_hi, (s8)
|
||||||
|
|
||||||
|
/* Load table 5 */
|
||||||
|
slli s8, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, s8
|
||||||
|
vle8.v v_gft5_lo, (x_tbl)
|
||||||
|
addi s8, x_tbl, 16
|
||||||
|
vle8.v v_gft5_hi, (s8)
|
||||||
|
|
||||||
|
/* Load table 6 */
|
||||||
|
slli s8, x_vec, 5
|
||||||
|
add x_tbl, x_tbl, s8
|
||||||
|
vle8.v v_gft6_lo, (x_tbl)
|
||||||
|
addi s8, x_tbl, 16
|
||||||
|
vle8.v v_gft6_hi, (s8)
|
||||||
|
|
||||||
|
/* Load destination pointers */
|
||||||
|
ld x_dest1, 0(x_dest)
|
||||||
|
ld x_dest2, 8(x_dest)
|
||||||
|
ld x_dest3, 16(x_dest)
|
||||||
|
ld x_dest4, 24(x_dest)
|
||||||
|
ld x_dest5, 32(x_dest)
|
||||||
|
ld x_dest6, 40(x_dest)
|
||||||
|
|
||||||
|
li x_pos, 0
|
||||||
|
|
||||||
|
.Llooprvv_vl:
|
||||||
|
blt x_pos, x_len, .Lloop_body
|
||||||
|
j .return_pass
|
||||||
|
|
||||||
|
.Lloop_body:
|
||||||
|
/* Load source data */
|
||||||
|
add a7, x_src, x_pos
|
||||||
|
vle8.v v_src, (a7)
|
||||||
|
|
||||||
|
/* Split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* load dest data */
|
||||||
|
add a7, x_dest1, x_pos
|
||||||
|
vle8.v v_dest1, (a7)
|
||||||
|
add a7, x_dest2, x_pos
|
||||||
|
vle8.v v_dest2, (a7)
|
||||||
|
add a7, x_dest3, x_pos
|
||||||
|
vle8.v v_dest3, (a7)
|
||||||
|
add a7, x_dest4, x_pos
|
||||||
|
vle8.v v_dest4, (a7)
|
||||||
|
add a7, x_dest5, x_pos
|
||||||
|
vle8.v v_dest5, (a7)
|
||||||
|
add a7, x_dest6, x_pos
|
||||||
|
vle8.v v_dest6, (a7)
|
||||||
|
|
||||||
|
/* dest1 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi
|
||||||
|
vxor.vv v_dest1, v_tmp_lo, v_dest1
|
||||||
|
vxor.vv v_dest1, v_tmp_hi, v_dest1
|
||||||
|
|
||||||
|
/* dest2 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi
|
||||||
|
vxor.vv v_dest2, v_tmp_lo, v_dest2
|
||||||
|
vxor.vv v_dest2, v_tmp_hi, v_dest2
|
||||||
|
|
||||||
|
/* dest3 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi
|
||||||
|
vxor.vv v_dest3, v_tmp_lo, v_dest3
|
||||||
|
vxor.vv v_dest3, v_tmp_hi, v_dest3
|
||||||
|
|
||||||
|
/* dest4 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft4_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft4_hi, v_src_hi
|
||||||
|
vxor.vv v_dest4, v_tmp_lo, v_dest4
|
||||||
|
vxor.vv v_dest4, v_tmp_hi, v_dest4
|
||||||
|
|
||||||
|
/* dest5 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft5_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft5_hi, v_src_hi
|
||||||
|
vxor.vv v_dest5, v_tmp_lo, v_dest5
|
||||||
|
vxor.vv v_dest5, v_tmp_hi, v_dest5
|
||||||
|
|
||||||
|
/* dest6 */
|
||||||
|
vrgather.vv v_tmp_lo, v_gft6_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp_hi, v_gft6_hi, v_src_hi
|
||||||
|
vxor.vv v_dest6, v_tmp_lo, v_dest6
|
||||||
|
vxor.vv v_dest6, v_tmp_hi, v_dest6
|
||||||
|
|
||||||
|
/* Store destination data */
|
||||||
|
add a7, x_dest1, x_pos
|
||||||
|
vse8.v v_dest1, (a7)
|
||||||
|
add a7, x_dest2, x_pos
|
||||||
|
vse8.v v_dest2, (a7)
|
||||||
|
add a7, x_dest3, x_pos
|
||||||
|
vse8.v v_dest3, (a7)
|
||||||
|
add a7, x_dest4, x_pos
|
||||||
|
vse8.v v_dest4, (a7)
|
||||||
|
add a7, x_dest5, x_pos
|
||||||
|
vse8.v v_dest5, (a7)
|
||||||
|
add a7, x_dest6, x_pos
|
||||||
|
vse8.v v_dest6, (a7)
|
||||||
|
|
||||||
|
/* Increment position */
|
||||||
|
add x_pos, x_pos, a6
|
||||||
|
|
||||||
|
j .Llooprvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
ld s8, 0(sp)
|
||||||
|
addi sp, sp, 16
|
||||||
|
|
||||||
|
li w_ret, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li w_ret, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
299
erasure_code/riscv64/gf_7vect_dot_prod_rvv.S
Normal file
299
erasure_code/riscv64/gf_7vect_dot_prod_rvv.S
Normal file
@@ -0,0 +1,299 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_7vect_dot_prod_rvv
|
||||||
|
.type gf_7vect_dot_prod_rvv, @function
|
||||||
|
|
||||||
|
/* void gf_7vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0 /* vector length */
|
||||||
|
#define x_vec a1 /* number of source vectors (ie. data blocks) */
|
||||||
|
#define x_tbl a2
|
||||||
|
#define x_src a3
|
||||||
|
#define x_dest a4
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_vec_i t1
|
||||||
|
#define x_ptr t2
|
||||||
|
#define x_pos t3
|
||||||
|
|
||||||
|
#define x_tbl1 t4
|
||||||
|
#define x_tbl2 t5
|
||||||
|
#define x_tbl3 t6
|
||||||
|
#define x_tbl4 s8
|
||||||
|
#define x_tbl5 a6
|
||||||
|
|
||||||
|
#define x_tbl6 a7
|
||||||
|
#define x_tbl7 s0
|
||||||
|
#define x_dest1 s1
|
||||||
|
#define x_dest2 s2
|
||||||
|
#define x_dest3 s3
|
||||||
|
#define x_dest4 s4
|
||||||
|
#define x_dest5 s5
|
||||||
|
#define x_dest6 s6
|
||||||
|
#define x_dest7 s7
|
||||||
|
|
||||||
|
/* vectors */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v3
|
||||||
|
|
||||||
|
#define v_dest1 v4
|
||||||
|
#define v_dest2 v5 /* destination 2 */
|
||||||
|
#define v_dest3 v6 /* destination 3 */
|
||||||
|
#define v_dest4 v7 /* destination 4 */
|
||||||
|
#define v_dest5 v8 /* destination 5 */
|
||||||
|
#define v_dest6 v9 /* destination 6 */
|
||||||
|
#define v_dest7 v10 /* destination 7 */
|
||||||
|
|
||||||
|
#define v_gft1_lo v11
|
||||||
|
#define v_gft1_hi v12
|
||||||
|
#define v_gft2_lo v13 /* GF table 2 low */
|
||||||
|
#define v_gft2_hi v14 /* GF table 2 high */
|
||||||
|
#define v_gft3_lo v15 /* GF table 3 low */
|
||||||
|
#define v_gft3_hi v16 /* GF table 3 high */
|
||||||
|
#define v_gft4_lo v17 /* GF table 4 low */
|
||||||
|
#define v_gft4_hi v18 /* GF table 4 high */
|
||||||
|
#define v_gft5_lo v19 /* GF table 5 low */
|
||||||
|
#define v_gft5_hi v20 /* GF table 5 high */
|
||||||
|
#define v_gft6_lo v21 /* GF table 6 low */
|
||||||
|
#define v_gft6_hi v22 /* GF table 6 high */
|
||||||
|
#define v_gft7_lo v23
|
||||||
|
#define v_gft7_hi v24
|
||||||
|
|
||||||
|
|
||||||
|
gf_7vect_dot_prod_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t0, 16
|
||||||
|
blt x_len, t0, .return_fail
|
||||||
|
|
||||||
|
/* save callee-saved registers */
|
||||||
|
addi sp, sp, -80
|
||||||
|
sd s0, 0(sp)
|
||||||
|
sd s1, 8(sp)
|
||||||
|
sd s2, 16(sp)
|
||||||
|
sd s3, 24(sp)
|
||||||
|
sd s4, 32(sp)
|
||||||
|
sd s5, 40(sp)
|
||||||
|
sd s6, 48(sp)
|
||||||
|
sd s7, 56(sp)
|
||||||
|
sd s8, 64(sp)
|
||||||
|
|
||||||
|
vsetvli t0, x0, e8, m1
|
||||||
|
|
||||||
|
/* initialize position */
|
||||||
|
li x_pos, 0
|
||||||
|
|
||||||
|
/* load destination pointers */
|
||||||
|
ld x_dest1, 0(x_dest)
|
||||||
|
ld x_dest2, 8(x_dest)
|
||||||
|
ld x_dest3, 16(x_dest)
|
||||||
|
ld x_dest4, 24(x_dest)
|
||||||
|
ld x_dest5, 32(x_dest)
|
||||||
|
ld x_dest6, 40(x_dest)
|
||||||
|
ld x_dest7, 48(x_dest)
|
||||||
|
|
||||||
|
/* Loop 1: x_len, vector length */
|
||||||
|
.Llooprvv_vl:
|
||||||
|
/* check if we have processed all elements */
|
||||||
|
bge x_pos, x_len, .return_pass
|
||||||
|
|
||||||
|
/* initialize vector loop counter */
|
||||||
|
li x_vec_i, 0
|
||||||
|
|
||||||
|
/* load source pointer */
|
||||||
|
ld x_ptr, 0(x_src)
|
||||||
|
|
||||||
|
/* clear destination vectors */
|
||||||
|
vmv.v.i v_dest1, 0
|
||||||
|
vmv.v.i v_dest2, 0
|
||||||
|
vmv.v.i v_dest3, 0
|
||||||
|
vmv.v.i v_dest4, 0
|
||||||
|
vmv.v.i v_dest5, 0
|
||||||
|
vmv.v.i v_dest6, 0
|
||||||
|
vmv.v.i v_dest7, 0
|
||||||
|
|
||||||
|
/* reset table pointers */
|
||||||
|
mv x_tbl1, x_tbl
|
||||||
|
mv x_tbl1, x_tbl
|
||||||
|
slli a5, x_vec, 5
|
||||||
|
add x_tbl2, x_tbl1, a5
|
||||||
|
add x_tbl3, x_tbl2, a5
|
||||||
|
add x_tbl4, x_tbl3, a5
|
||||||
|
add x_tbl5, x_tbl4, a5
|
||||||
|
add x_tbl6, x_tbl5, a5
|
||||||
|
add x_tbl7, x_tbl6, a5
|
||||||
|
|
||||||
|
.Llooprvv_vl_vects:
|
||||||
|
/* load source data */
|
||||||
|
slli a5, x_vec_i, 3
|
||||||
|
add a5,x_src,a5
|
||||||
|
ld x_ptr, 0(a5)
|
||||||
|
add x_ptr,x_ptr,x_pos
|
||||||
|
|
||||||
|
vle8.v v_src, (x_ptr)
|
||||||
|
|
||||||
|
/* split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* load gf_table's */
|
||||||
|
vle8.v v_gft1_lo, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
vle8.v v_gft1_hi, (x_tbl1)
|
||||||
|
addi x_tbl1, x_tbl1, 16
|
||||||
|
|
||||||
|
vle8.v v_gft2_lo, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
vle8.v v_gft2_hi, (x_tbl2)
|
||||||
|
addi x_tbl2, x_tbl2, 16
|
||||||
|
|
||||||
|
vle8.v v_gft3_lo, (x_tbl3)
|
||||||
|
addi x_tbl3, x_tbl3, 16
|
||||||
|
vle8.v v_gft3_hi, (x_tbl3)
|
||||||
|
addi x_tbl3, x_tbl3, 16
|
||||||
|
|
||||||
|
vle8.v v_gft4_lo, (x_tbl4)
|
||||||
|
addi x_tbl4, x_tbl4, 16
|
||||||
|
vle8.v v_gft4_hi, (x_tbl4)
|
||||||
|
addi x_tbl4, x_tbl4, 16
|
||||||
|
|
||||||
|
vle8.v v_gft5_lo, (x_tbl5)
|
||||||
|
addi x_tbl5, x_tbl5, 16
|
||||||
|
vle8.v v_gft5_hi, (x_tbl5)
|
||||||
|
addi x_tbl5, x_tbl5, 16
|
||||||
|
|
||||||
|
vle8.v v_gft6_lo, (x_tbl6)
|
||||||
|
addi x_tbl6, x_tbl6, 16
|
||||||
|
vle8.v v_gft6_hi, (x_tbl6)
|
||||||
|
addi x_tbl6, x_tbl6, 16
|
||||||
|
|
||||||
|
vle8.v v_gft7_lo, (x_tbl7)
|
||||||
|
addi x_tbl7, x_tbl7, 16
|
||||||
|
vle8.v v_gft7_hi, (x_tbl7)
|
||||||
|
addi x_tbl7, x_tbl7, 16
|
||||||
|
|
||||||
|
|
||||||
|
/* dest 1 */
|
||||||
|
vrgather.vv v26, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft1_hi, v_src_hi
|
||||||
|
vxor.vv v_dest1, v_dest1, v26
|
||||||
|
vxor.vv v_dest1, v_dest1, v27
|
||||||
|
|
||||||
|
/* dest 2 */
|
||||||
|
vrgather.vv v26, v_gft2_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft2_hi, v_src_hi
|
||||||
|
vxor.vv v_dest2, v_dest2, v26
|
||||||
|
vxor.vv v_dest2, v_dest2, v27
|
||||||
|
|
||||||
|
/* GF multiplication and accumulation for dest3 */
|
||||||
|
vrgather.vv v26, v_gft3_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft3_hi, v_src_hi
|
||||||
|
vxor.vv v_dest3, v_dest3, v26
|
||||||
|
vxor.vv v_dest3, v_dest3, v27
|
||||||
|
|
||||||
|
/* GF multiplication and accumulation for dest4 */
|
||||||
|
vrgather.vv v26, v_gft4_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft4_hi, v_src_hi
|
||||||
|
vxor.vv v_dest4, v_dest4, v26
|
||||||
|
vxor.vv v_dest4, v_dest4, v27
|
||||||
|
|
||||||
|
/* GF multiplication and accumulation for dest5 */
|
||||||
|
vrgather.vv v26, v_gft5_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft5_hi, v_src_hi
|
||||||
|
vxor.vv v_dest5, v_dest5, v26
|
||||||
|
vxor.vv v_dest5, v_dest5, v27
|
||||||
|
|
||||||
|
/* GF multiplication and accumulation for dest6 */
|
||||||
|
vrgather.vv v26, v_gft6_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft6_hi, v_src_hi
|
||||||
|
vxor.vv v_dest6, v_dest6, v26
|
||||||
|
vxor.vv v_dest6, v_dest6, v27
|
||||||
|
|
||||||
|
|
||||||
|
/* GF multiplication and accumulation for dest7 */
|
||||||
|
vrgather.vv v26, v_gft7_lo, v_src_lo
|
||||||
|
vrgather.vv v27, v_gft7_hi, v_src_hi
|
||||||
|
vxor.vv v_dest7, v_dest7, v26
|
||||||
|
vxor.vv v_dest7, v_dest7, v27
|
||||||
|
|
||||||
|
/* increment x_vec_i */
|
||||||
|
addi x_vec_i, x_vec_i, 1
|
||||||
|
blt x_vec_i, x_vec, .Llooprvv_vl_vects
|
||||||
|
|
||||||
|
/* Store results to destination */
|
||||||
|
vse8.v v_dest1, (x_dest1)
|
||||||
|
vse8.v v_dest2, (x_dest2)
|
||||||
|
vse8.v v_dest3, (x_dest3)
|
||||||
|
vse8.v v_dest4, (x_dest4)
|
||||||
|
vse8.v v_dest5, (x_dest5)
|
||||||
|
vse8.v v_dest6, (x_dest6)
|
||||||
|
vse8.v v_dest7, (x_dest7)
|
||||||
|
|
||||||
|
add x_dest1,x_dest1, t0
|
||||||
|
add x_dest2,x_dest2, t0
|
||||||
|
add x_dest3,x_dest3, t0
|
||||||
|
add x_dest4,x_dest4, t0
|
||||||
|
add x_dest5,x_dest5, t0
|
||||||
|
add x_dest6,x_dest6, t0
|
||||||
|
add x_dest7,x_dest7, t0
|
||||||
|
|
||||||
|
/* increment one vector length */
|
||||||
|
add x_pos, x_pos, t0
|
||||||
|
j .Llooprvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
/* Restore callee-saved registers */
|
||||||
|
ld s0, 0(sp)
|
||||||
|
ld s1, 8(sp)
|
||||||
|
ld s2, 16(sp)
|
||||||
|
ld s3, 24(sp)
|
||||||
|
ld s4, 32(sp)
|
||||||
|
ld s5, 40(sp)
|
||||||
|
ld s6, 48(sp)
|
||||||
|
ld s7, 56(sp)
|
||||||
|
ld s8, 64(sp)
|
||||||
|
addi sp, sp, 80
|
||||||
|
|
||||||
|
/* Return success */
|
||||||
|
li a0, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li a0, 1 # return fail
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
136
erasure_code/riscv64/gf_vect_dot_prod_rvv.S
Normal file
136
erasure_code/riscv64/gf_vect_dot_prod_rvv.S
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
# RISC-V RVV implementation of gf_vect_dot_prod_rvv
|
||||||
|
|
||||||
|
# Function: gf_vect_dot_prod_rvv
|
||||||
|
# Arguments:
|
||||||
|
# a0: len (vector length)
|
||||||
|
# a1: vlen (number of source vectors)
|
||||||
|
# a2: gftbls (pointer to GF(2^8) multiplication tables)
|
||||||
|
# a3: src (pointer to array of source vector pointers)
|
||||||
|
# a4: dest (pointer to destination vector)
|
||||||
|
|
||||||
|
# Local variables:
|
||||||
|
# t0: vec_i (source vector index)
|
||||||
|
# t1: ptr (pointer to current source vector)
|
||||||
|
# t2: pos (current position in vector)
|
||||||
|
# t3: tbl1 (pointer to current GF table)
|
||||||
|
|
||||||
|
# Vector registers:
|
||||||
|
# v0: z_mask0f (mask for low 4 bits)
|
||||||
|
# v1: z_src (source vector data)
|
||||||
|
# v2: z_src_lo (low 4 bits of source vector)
|
||||||
|
# v3: z_src_hi (high 4 bits of source vector)
|
||||||
|
# v4: z_dest (destination vector)
|
||||||
|
# v5: z_gft1_lo (low 8 bits of GF table)
|
||||||
|
# v6: z_gft1_hi (high 8 bits of GF table)
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.global gf_vect_dot_prod_rvv
|
||||||
|
.type gf_vect_dot_prod_rvv, @function
|
||||||
|
|
||||||
|
gf_vect_dot_prod_rvv:
|
||||||
|
# Check if len < 16
|
||||||
|
li t4, 16
|
||||||
|
blt a0, t4, .return_fail
|
||||||
|
|
||||||
|
vsetvli t5, zero, e8, m1 # Set vector length to maximum
|
||||||
|
|
||||||
|
# Initialize pos = 0
|
||||||
|
li t2, 0
|
||||||
|
|
||||||
|
# Multiply vlen by 8 (each pointer is 8 bytes)
|
||||||
|
slli a1, a1, 3
|
||||||
|
|
||||||
|
.Llooprvv_vl:
|
||||||
|
# Check if pos >= len
|
||||||
|
bge t2, a0, .return_pass
|
||||||
|
|
||||||
|
# Clear z_dest
|
||||||
|
vmv.v.i v4, 0
|
||||||
|
|
||||||
|
# Initialize vec_i = 0
|
||||||
|
li t0, 0
|
||||||
|
|
||||||
|
# Reset tbl1 to gftbls
|
||||||
|
mv t3, a2
|
||||||
|
|
||||||
|
.Llooprvv_vl_vects:
|
||||||
|
# Load src[vec_i] into ptr
|
||||||
|
add t6, a3, t0 # src + vec_i * 8
|
||||||
|
ld t1, 0(t6) # Load pointer to current source vector
|
||||||
|
|
||||||
|
# Load src data into z_src
|
||||||
|
add t1, t1, t2 # add offset
|
||||||
|
vle8.v v1, (t1) # Load source vector into v1
|
||||||
|
|
||||||
|
# Increment vec_i
|
||||||
|
addi t0, t0, 8
|
||||||
|
|
||||||
|
# Load GF table (low and high)
|
||||||
|
vle8.v v5, (t3) # Load low 8 bits of GF table
|
||||||
|
addi t3, t3, 16 # Move to next GF table entry
|
||||||
|
vle8.v v6, (t3) # Load high 8 bits of GF table
|
||||||
|
addi t3, t3, 16 # Move to next GF table entry
|
||||||
|
|
||||||
|
# Split src into low and high 4 bits
|
||||||
|
vand.vi v2, v1, 0x0F # z_src_lo = z_src & z_mask0f
|
||||||
|
vsrl.vi v3, v1, 4 # z_src_hi = z_src >> 4
|
||||||
|
|
||||||
|
# GF multiplication (table lookup)
|
||||||
|
vrgather.vv v8, v5, v2 # z_gft1_lo = GF table lookup for low 4 bits
|
||||||
|
vrgather.vv v9, v6, v3 # z_gft1_hi = GF table lookup for high 4 bits
|
||||||
|
|
||||||
|
# GF addition (XOR)
|
||||||
|
vxor.vv v4, v4, v8 # z_dest ^= z_gft1_lo
|
||||||
|
vxor.vv v4, v4, v9 # z_dest ^= z_gft1_hi
|
||||||
|
|
||||||
|
# Check if vec_i < vlen
|
||||||
|
blt t0, a1, .Llooprvv_vl_vects
|
||||||
|
|
||||||
|
# Store z_dest to dest[pos]
|
||||||
|
vse8.v v4, (a4) # Store destination vector
|
||||||
|
add a4, a4, t5 # Move dest pointer to next position
|
||||||
|
|
||||||
|
# Increment pos
|
||||||
|
add t2, t2, t5 # pos += 16 (vector length)
|
||||||
|
|
||||||
|
j .Llooprvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
li a0, 0 # Return 0 (success)
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li a0, 1 # Return 1 (failure)
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
119
erasure_code/riscv64/gf_vect_mad_rvv.S
Normal file
119
erasure_code/riscv64/gf_vect_mad_rvv.S
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_vect_mad_rvv
|
||||||
|
.type gf_vect_mad_rvv, @function
|
||||||
|
|
||||||
|
/* gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
|
unsigned char *src, unsigned char *dest);
|
||||||
|
*/
|
||||||
|
/* arguments */
|
||||||
|
#define x_len a0
|
||||||
|
#define x_vec_i a2
|
||||||
|
#define x_tbl a3
|
||||||
|
#define x_src a4
|
||||||
|
#define x_dest a5
|
||||||
|
|
||||||
|
/* returns */
|
||||||
|
#define w_ret a0
|
||||||
|
|
||||||
|
/* local variables */
|
||||||
|
#define x_pos t0
|
||||||
|
|
||||||
|
/* vectors */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v3
|
||||||
|
#define v_dest v4
|
||||||
|
#define v_tmp1_lo v5
|
||||||
|
#define v_tmp1_hi v6
|
||||||
|
#define v_gft1_lo v7
|
||||||
|
#define v_gft1_hi v8
|
||||||
|
|
||||||
|
gf_vect_mad_rvv:
|
||||||
|
/* less than 16 bytes, return_fail */
|
||||||
|
li t1, 16
|
||||||
|
blt x_len, t1, .return_fail
|
||||||
|
|
||||||
|
vsetvli t2, x0, e8, m1
|
||||||
|
|
||||||
|
/* x_tbl += x_vec_i * 2^5 */
|
||||||
|
slli t1, x_vec_i, 5
|
||||||
|
add x_tbl, x_tbl, t1
|
||||||
|
|
||||||
|
/* Load gft1_lo and gft1_hi */
|
||||||
|
vle8.v v_gft1_lo, (x_tbl)
|
||||||
|
addi t1, x_tbl, 16
|
||||||
|
vle8.v v_gft1_hi, (t1)
|
||||||
|
|
||||||
|
li x_pos, 0
|
||||||
|
|
||||||
|
.Lloop_rvv_vl:
|
||||||
|
/* load src data */
|
||||||
|
vle8.v v_src, (x_src)
|
||||||
|
|
||||||
|
/* split 4-bit lo; 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* load dest data */
|
||||||
|
vle8.v v_dest, (x_dest)
|
||||||
|
|
||||||
|
/* table indexing, ie. gf(2^8) multiplication */
|
||||||
|
/* RISC-V RVV does not have tbl instruction, use vrgather.vv */
|
||||||
|
vrgather.vv v_tmp1_lo, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp1_hi, v_gft1_hi, v_src_hi
|
||||||
|
|
||||||
|
/* exclusive or, ie. gf(2^8) add */
|
||||||
|
vxor.vv v_dest, v_tmp1_lo, v_dest
|
||||||
|
vxor.vv v_dest, v_tmp1_hi, v_dest
|
||||||
|
|
||||||
|
/* store dest data */
|
||||||
|
vse8.v v_dest, (x_dest)
|
||||||
|
|
||||||
|
/* increment one vector length */
|
||||||
|
add x_pos, x_pos, t2
|
||||||
|
add x_src, x_src, t2
|
||||||
|
add x_dest, x_dest, t2
|
||||||
|
|
||||||
|
blt x_pos, x_len, .Lloop_rvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
li w_ret, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li w_ret, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
114
erasure_code/riscv64/gf_vect_mul_rvv.S
Normal file
114
erasure_code/riscv64/gf_vect_mul_rvv.S
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
##################################################################
|
||||||
|
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of sanechips Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global gf_vect_mul_rvv
|
||||||
|
.type gf_vect_mul_rvv, @function
|
||||||
|
|
||||||
|
/* Function arguments:
|
||||||
|
* a0: len - Length of vector in bytes.
|
||||||
|
* a1: gftbl - Pointer to 32-byte array of pre-calculated constants.
|
||||||
|
* a2: src - Pointer to source data array.
|
||||||
|
* a3: dest - Pointer to destination data array.
|
||||||
|
* Returns:
|
||||||
|
* a0: 0 for success, 1 for failure.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Local variables */
|
||||||
|
#define x_pos t0
|
||||||
|
#define x_tmp t1
|
||||||
|
#define x_ptr t2
|
||||||
|
#define x_len a0
|
||||||
|
#define x_tbl a1
|
||||||
|
#define x_src a2
|
||||||
|
#define x_dest a3
|
||||||
|
|
||||||
|
/* Vector registers */
|
||||||
|
#define v_src v1
|
||||||
|
#define v_src_lo v2
|
||||||
|
#define v_src_hi v3
|
||||||
|
#define v_dest v4
|
||||||
|
#define v_tmp1_lo v5
|
||||||
|
#define v_tmp1_hi v6
|
||||||
|
#define v_gft1_lo v7
|
||||||
|
#define v_gft1_hi v8
|
||||||
|
|
||||||
|
gf_vect_mul_rvv:
|
||||||
|
/* Check if len is 32 bytes */
|
||||||
|
andi x_tmp, x_len, 0x1F
|
||||||
|
bnez x_tmp, .return_fail
|
||||||
|
|
||||||
|
vsetvli t6, x0, e8, m1
|
||||||
|
|
||||||
|
/* Load pre-calculated constants into v_gft1_lo and v_gft1_hi */
|
||||||
|
vle8.v v_gft1_lo, (x_tbl)
|
||||||
|
addi t3, x_tbl, 16
|
||||||
|
vle8.v v_gft1_hi, (t3)
|
||||||
|
|
||||||
|
/* Initialize position counter */
|
||||||
|
li x_pos, 0
|
||||||
|
|
||||||
|
.Llooprvv_vl:
|
||||||
|
/* Load source data into v_src */
|
||||||
|
add x_ptr,x_src,x_pos
|
||||||
|
vle8.v v_src, (x_ptr)
|
||||||
|
|
||||||
|
/* Split 4-bit lo and 4-bit hi */
|
||||||
|
vand.vi v_src_lo, v_src, 0x0F
|
||||||
|
vsrl.vi v_src_hi, v_src, 4
|
||||||
|
|
||||||
|
/* Table lookup (GF multiplication) */
|
||||||
|
vrgather.vv v_tmp1_lo, v_gft1_lo, v_src_lo
|
||||||
|
vrgather.vv v_tmp1_hi, v_gft1_hi, v_src_hi
|
||||||
|
|
||||||
|
/* XOR (GF addition) */
|
||||||
|
vxor.vv v_dest, v_tmp1_hi, v_tmp1_lo
|
||||||
|
|
||||||
|
/* Store result to destination */
|
||||||
|
vse8.v v_dest, (x_dest)
|
||||||
|
|
||||||
|
/* Increment position counter */
|
||||||
|
add x_pos, x_pos, t6
|
||||||
|
add x_dest, x_dest, t6
|
||||||
|
|
||||||
|
/* Check if we have processed all bytes */
|
||||||
|
blt x_pos, x_len, .Llooprvv_vl
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
li a0, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
li a0, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
#endif
|
||||||
Reference in New Issue
Block a user