erasure_code: optimize RVV implementation

The ISA-L EC code has been written using RVV vector instructions and the minimum multiplication table,
resulting in a performance improvement of over 10 times compared to the existing implementation.

Signed-off-by: Shuo Lv <lv.shuo@sanechips.com.cn>
This commit is contained in:
lvshuo
2025-06-03 15:54:11 +08:00
committed by Pablo de Lara
parent f2883f24fd
commit d414b2702a
27 changed files with 3137 additions and 536 deletions

View File

@@ -152,7 +152,7 @@ v2.32
- Added new RVV xor_gen, pq_gen implementations.
* Erasure coding improvements:
- Added new RVV ec_encode_data, gf_vect_dot_prod, gf_vect_mul implementations.
- Added new RVV ec_encode_data,ec_encode_data_update,gf_vect_mad, gf_vect_dot_prod, gf_vect_mul implementations.
* Zero-memory detection improvements:
- Added new RVV implementations.

View File

@@ -67,8 +67,13 @@ case "${CPU}" in
])],
[AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions])
AM_CONDITIONAL([HAVE_RVV], [true]) rvv=yes],
[AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no]
[AC_DEFINE([HAVE_RVV], [0], [Disable RVV instructions])
AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no]
)
if test "x$rvv" = "xyes"; then
CFLAGS+=" -march=rv64gcv"
CCASFLAGS+=" -march=rv64gcv"
fi
AC_MSG_RESULT([$rvv])
;;

View File

@@ -50,6 +50,9 @@
#define EFENCE_TEST_MIN_SIZE 16
#define EFENCE_TEST_MAX_SIZE EFENCE_TEST_MIN_SIZE + 0x100
#if HAVE_RVV
#define EC_ALIGNED_ADDR
#endif
#ifdef EC_ALIGNED_ADDR
// Define power of 2 range to check ptr, len alignment
#define PTR_ALIGN_CHK_B 0

View File

@@ -35,8 +35,13 @@
#include "test.h"
#ifndef ALIGN_SIZE
#if HAVE_RVV
#define EC_ALIGNED_ADDR
#define ALIGN_SIZE 32
#else
#define ALIGN_SIZE 16
#endif
#endif
// By default, test multibinary version
#ifndef FUNCTION_UNDER_TEST

View File

@@ -1,3 +1,31 @@
#########################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
########################################################################
# Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
#
@@ -28,8 +56,20 @@
########################################################################
lsrc_riscv64 += \
erasure_code/riscv64/ec_multibinary_riscv64_dispatcher.c \
erasure_code/riscv64/ec_riscv64_dispatcher.c \
erasure_code/riscv64/ec_multibinary_riscv64.S \
erasure_code/riscv64/ec_gf_vect_mul_rvv.S \
erasure_code/riscv64/ec_gf_vect_dot_prod_rvv.S \
erasure_code/riscv64/ec_encode_data_rvv.S
erasure_code/riscv64/ec_riscv64_highlevel_func.c \
erasure_code/riscv64/gf_vect_dot_prod_rvv.S \
erasure_code/riscv64/gf_2vect_dot_prod_rvv.S \
erasure_code/riscv64/gf_3vect_dot_prod_rvv.S \
erasure_code/riscv64/gf_4vect_dot_prod_rvv.S \
erasure_code/riscv64/gf_5vect_dot_prod_rvv.S \
erasure_code/riscv64/gf_6vect_dot_prod_rvv.S \
erasure_code/riscv64/gf_7vect_dot_prod_rvv.S \
erasure_code/riscv64/gf_vect_mad_rvv.S \
erasure_code/riscv64/gf_2vect_mad_rvv.S \
erasure_code/riscv64/gf_3vect_mad_rvv.S \
erasure_code/riscv64/gf_4vect_mad_rvv.S \
erasure_code/riscv64/gf_5vect_mad_rvv.S \
erasure_code/riscv64/gf_6vect_mad_rvv.S \
erasure_code/riscv64/gf_vect_mul_rvv.S

View File

@@ -1,154 +0,0 @@
/**********************************************************************
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of ISCAS nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#if HAVE_RVV
#include "ec_table.S"
.option arch, +v
.global ec_encode_data_rvv
.type ec_encode_data_rvv, %function
ec_encode_data_rvv:
blez a2, 3f
blez a0, 3f
lla t0, gff_base
lla t1, gflog_base
addi a3, a3, 1
vsetvli zero, a1, e8, mf2, ta, ma
vmv.v.i v20, 0
li t3, 32
mv a6, a0 // backup len
mv a7, a5 // backup dest
csrr t5, vlenb // vlen/8
srli t5, t5, 1 // mf2: vlen/16
blt t5, a1, slow // vlen/16(hardware) < vlen(software)
2:
li t2, 0 // l
vlse8.v v24, (a3), t3 // v[j*32+1]
vmsne.vi v12, v24, 0 // if v == 0
vluxei8.v v24, (t1), v24 // gflag_base[v[]]
ld a5, (a5)
1:
vsetvli zero, zero, e8, mf2, ta, ma
vle64.v v16, (a4) // src[j]
vluxei64.v v16, (t2), v16 // src[j][i]
vmsne.vi v0, v16, 0 // if src == 0
vmand.mm v0, v0, v12 // if src == 0 || v == 0
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
vwaddu.vv v8, v16, v24, v0.t
vmv.v.i v16, 0
vsetvli zero, zero, e8, mf2, ta, mu
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
vxor.vv v20, v16, v20
vmv.s.x v8, zero
vredxor.vs v8, v20, v8
vmv.x.s t5, v8
addi a0, a0, -1 // len
sb t5, (a5) // dest[0][i]
addi t2, t2, 1 // src[j][i]
vmv.v.i v20, 0
addi a5, a5, 1 // dest[i]
bnez a0, 1b
addi a2, a2, -1 // l(dests)
addi a7, a7, 8
mv a0, a6 // restore len
mv a5, a7 // update unsigned char **dest
slli t5, a1, 5 // += vlen * 32
add a3, a3, t5
bnez a2, 2b
ret
slow:
addi sp, sp, -16
sd s2, 0(sp)
sd s3, 8(sp)
mv s3, a4 // src
mv s2, a3 // v
mv t4, a1 // backup vlen
2:
li t2, 0 // i < len
ld a5, (a5)
1:
vsetvli t6, a1, e8, mf2, ta, ma
vle64.v v16, (a4) // src[j]
vluxei64.v v16, (t2), v16 // src[j][i]
vlse8.v v24, (a3), t3 // v[j*32+1]
vmsne.vi v12, v24, 0 // if v == 0
vmsne.vi v0, v16, 0 // if src == 0
vmand.mm v0, v0, v12 // if src == 0 || v == 0
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
vluxei8.v v24, (t1), v24 // gflag_base[v[]]
vwaddu.vv v8, v16, v24, v0.t
vmv.v.i v16, 0
vsetvli zero, zero, e8, mf2, ta, mu
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
vxor.vv v20, v16, v20
sub a1, a1, t6
slli t5, t6, 5
add a3, a3, t5 // v += 32 * vlen
slli t5, t6, 3
add a4, a4, t5 // src += 8 * vlen
bnez a1, 1b // for (j = 0; j < vlen; j++)
vsetvli zero, t4, e8, mf2, ta, ma
vmv.s.x v8, zero
vredxor.vs v8, v20, v8
vmv.x.s t5, v8
addi a0, a0, -1 // len
sb t5, (a5) // dest[0][i]
addi t2, t2, 1 // src[j][i]
vmv.v.i v20, 0
mv a1, t4 // restore vlen
mv a3, s2 // restore v
mv a4, s3 // restore src
addi a5, a5, 1 // dest[i]
bnez a0, 1b // for (i = 0; i < len; i++)
addi a2, a2, -1 // l(dests)
addi a7, a7, 8 // for (l = 0; l < dests; l++)
mv a0, a6 // restore len
mv a5, a7
slli t5, t4, 5
add a3, a3, t5 // v += vlen * 32
mv s2, a3
bnez a2, 2b // for (l = 0; l < dests; l++) {
ld s2, 0(sp)
ld s3, 8(sp)
addi sp, sp, 16
3:
ret
#endif

View File

@@ -1,120 +0,0 @@
/**********************************************************************
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of ISCAS nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#if HAVE_RVV
#include "ec_table.S"
.option arch, +v
.global gf_vect_dot_prod_rvv
.type gf_vect_dot_prod_rvv, %function
gf_vect_dot_prod_rvv:
blez a0, 3f
lla t0, gff_base
lla t1, gflog_base
addi a2, a2, 1
vsetvli zero, a1, e8, mf2, ta, ma
vmv.v.i v20, 0
li t2, 0
li t3, 32
csrr t5, vlenb // vlen/8
srli t5, t5, 1 // mf2: vlen/16
blt t5, a1, slow // vlen/16(hardware) < vlen(software)
vlse8.v v24, (a2), t3 // v[j*32+1]
vmsne.vi v12, v24, 0 // if v == 0
vluxei8.v v24, (t1), v24 // gflag_base[v[]]
1:
vsetvli zero, zero, e8, mf2, ta, ma
vle64.v v16, (a3) // src[j]
vluxei64.v v16, (t2), v16 // src[j][i]
vmsne.vi v0, v16, 0 // if src == 0
vmand.mm v0, v0, v12 // if src == 0 || v == 0
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
vwaddu.vv v8, v16, v24, v0.t
vmv.v.i v16, 0
vsetvli zero, zero, e8, mf2, ta, mu
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
vxor.vv v20, v16, v20
vmv.s.x v8, zero
vredxor.vs v8, v20, v8
vmv.x.s t5, v8
addi a0, a0, -1 // len
sb t5, (a4)
addi t2, t2, 1 // src[j][i]
vmv.v.i v20, 0
addi a4, a4, 1 // dest[i]
bnez a0, 1b
ret
slow:
mv a7, a3 // src
mv a6, a2 // v
mv t4, a1 // vlen
1:
vsetvli t6, a1, e8, mf2, ta, ma
vle64.v v16, (a3)
vluxei64.v v16, (t2), v16 // src[j][i]
vlse8.v v24, (a2), t3 // v[j*32+1]
vmsne.vi v0, v16, 0 // if src == 0
vmsne.vi v12, v24, 0 // if v == 0
vmand.mm v0, v0, v12
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
vluxei8.v v24, (t1), v24, v0.t // gflag_base[v[]]
vwaddu.vv v8, v16, v24, v0.t
vmv.v.i v16, 0
vsetvli zero, zero, e8, mf2, ta, mu
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
vxor.vv v20, v16, v20
slli t5, t6, 5
add a2, a2, t5 // v += 32 * vlen
slli t5, t6, 3
add a3, a3, t5 // src += 8 * vlen
sub a1, a1, t6 // vlen
bnez a1, 1b // for (j = 0; j < vlen; j++)
vsetvli zero, t4, e8, mf2, ta, mu
vmv.s.x v8, zero
vredxor.vs v8, v20, v8
vmv.x.s t5, v8
addi a0, a0, -1 // len
mv a3, a7 // src
mv a2, a6 // v
mv a1, t4 // vlen
addi t2, t2, 1 // i
sb t5, (a4)
vmv.v.i v20, 0
addi a4, a4, 1 // dest[i]
bnez a0, 1b // for (i = 0; i < len; i++) {
3:
ret
#endif

View File

@@ -1,76 +0,0 @@
/**********************************************************************
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of ISCAS nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#if HAVE_RVV
#include "ec_table.S"
.option arch, +v
.global gf_vect_mul_rvv
.type gf_vect_mul_rvv, %function
gf_vect_mul_rvv:
li t4, 32
rem t4, a0, t4
bnez t4, ret1 // (len % 32) != 0
lla t0, gff_base
lla t1, gflog_base
lbu t2, 1(a1) // unsigned char c = a[1];
beqz t2, 2f
add t2, t1, t2 // &gflog_base[c]
lbu t2, (t2) // gflog_base[c]
1:
vsetvli t6, a0, e8, m4, ta, ma
vle8.v v16, (a2) // src
vmsne.vi v0, v16, 0 // if b == 0
vluxei8.v v16, (t1), v16, v0.t // gflag_base[b]
vwaddu.vx v8, v16, t2, v0.t
vmv.v.i v16, 0
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
vse8.v v16, (a3)
add a2, a2, t6
add a3, a3, t6
sub a0, a0, t6
bnez a0, 1b
ret
2:
vsetvli t6, a0, e8, m8, ta, ma
vmv.v.i v0, 0
3:
vsetvli t6, a0, e8, m8, ta, ma
vse8.v v0, (a3)
add a3, a3, t6
sub a0, a0, t6
bnez a0, 3b
ret
ret1:
li a0, -1
ret
#endif

View File

@@ -1,3 +1,31 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
/**********************************************************************
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
@@ -29,16 +57,9 @@
#include "riscv64_multibinary.h"
#if HAVE_RVV
mbin_interface gf_vect_mul
mbin_interface gf_vect_dot_prod
mbin_interface ec_encode_data
#else
mbin_interface_base gf_vect_mul gf_vect_mul_base
mbin_interface_base gf_vect_dot_prod gf_vect_dot_prod_base
mbin_interface_base ec_encode_data ec_encode_data_base
#endif
mbin_interface_base ec_init_tables ec_init_tables_base
mbin_interface_base ec_encode_data_update ec_encode_data_update_base
mbin_interface_base gf_vect_mad gf_vect_mad_base
mbin_interface ec_encode_data
mbin_interface gf_vect_mul
mbin_interface gf_vect_dot_prod
mbin_interface gf_vect_mad
mbin_interface ec_encode_data_update
mbin_interface ec_init_tables

View File

@@ -1,78 +0,0 @@
/**********************************************************************
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of ISCAS nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "riscv64_multibinary.h"
extern int
gf_vect_mul_rvv(int len, unsigned char *a, unsigned char *src, unsigned char *dest);
extern int
gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest);
extern void
gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest);
extern void
gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src,
unsigned char *dest);
extern void
ec_encode_data_rvv(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
unsigned char **dest);
extern void
ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
unsigned char **dest);
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
{
#if HAVE_RVV
const unsigned long hwcap = getauxval(AT_HWCAP);
if (hwcap & HWCAP_RV('V'))
return gf_vect_mul_rvv;
else
#endif
return gf_vect_mul_base;
}
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
{
#if HAVE_RVV
const unsigned long hwcap = getauxval(AT_HWCAP);
if (hwcap & HWCAP_RV('V'))
return gf_vect_dot_prod_rvv;
else
#endif
return gf_vect_dot_prod_base;
}
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
{
#if HAVE_RVV
const unsigned long hwcap = getauxval(AT_HWCAP);
if (hwcap & HWCAP_RV('V'))
return ec_encode_data_rvv;
else
#endif
return ec_encode_data_base;
}

View File

@@ -0,0 +1,147 @@
/**************************************************************
Copyright (c) 2025 sanechips Technologies Co., Ltd.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of sanechips Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
/**********************************************************************
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of ISCAS nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "riscv64_multibinary.h"
extern void
gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *dest);
extern void
gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src,
unsigned char *dest);
extern void
gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);
extern void
gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
unsigned char *dest);
extern void
ec_encode_data_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls, unsigned char *data,
unsigned char **coding);
extern void
ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
unsigned char **dest);
extern int
gf_vect_mul_rvv(int len, unsigned char *a, unsigned char *src, unsigned char *dest);
extern int
gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest);
extern void
ec_encode_data_update_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding);
extern void
ec_encode_data_update_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
unsigned char **dest);
extern void
ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls);
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
{
#if HAVE_RVV
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_RV('V'))
return gf_vect_dot_prod_rvv;
#endif
return gf_vect_dot_prod_base;
}
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
{
#if HAVE_RVV
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_RV('V'))
return gf_vect_mad_rvv;
#endif
return gf_vect_mad_base;
}
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
{
#if HAVE_RVV
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_RV('V'))
return ec_encode_data_rvv;
#endif
return ec_encode_data_base;
}
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
{
#if HAVE_RVV
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_RV('V'))
return ec_encode_data_update_rvv;
#endif
return ec_encode_data_update_base;
}
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
{
#if HAVE_RVV
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_RV('V'))
return gf_vect_mul_rvv;
#endif
return gf_vect_mul_base;
}
DEFINE_INTERFACE_DISPATCHER(ec_init_tables) { return ec_init_tables_base; }

View File

@@ -0,0 +1,188 @@
/**************************************************************
Copyright (c) 2025 sanechips Technologies Co., Ltd.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of sanechips Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#if HAVE_RVV
#include "erasure_code.h"
/*external function*/
/* RVV */
extern void
gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *dest);
extern void
gf_2vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_3vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_4vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_5vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_6vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_7vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_8vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);
extern void
gf_2vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_3vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_4vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_5vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_6vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void
ec_encode_data_rvv(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
while (rows > 11) {
gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 11:
/* 7 + 4 */
gf_7vect_dot_prod_rvv(len, k, g_tbls, data, coding);
g_tbls += 7 * k * 32;
coding += 7;
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
break;
case 10:
/* 6 + 4 */
gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
break;
case 9:
/* 5 + 4 */
gf_5vect_dot_prod_rvv(len, k, g_tbls, data, coding);
g_tbls += 5 * k * 32;
coding += 5;
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
break;
case 8:
/* 4 + 4 */
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32;
coding += 4;
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
break;
case 7:
gf_7vect_dot_prod_rvv(len, k, g_tbls, data, coding);
break;
case 6:
gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding);
break;
case 5:
gf_5vect_dot_prod_rvv(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_rvv(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_rvv(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_rvv(len, k, g_tbls, data, *coding);
break;
default:
break;
}
}
void
ec_encode_data_update_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{
if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows > 6) {
gf_6vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_rvv(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_rvv(len, k, vec_i, g_tbls, data, *coding);
break;
default:
break;
}
}
#endif

View File

@@ -1,88 +0,0 @@
/**********************************************************************
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of ISCAS nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
// Reference: https://git.ffmpeg.org/gitweb/ffmpeg.git/commit/746f1ff36ac0d232687820fbde4e4efc79093af7
.macro const sym, align=3, relocate=0
.if \relocate
.pushsection .data.rel.ro
.else
.pushsection .rodata
.endif
.align \align
\sym:
.macro endconst
.size \sym, . - \sym
.popsection
.purgem endconst
.endm
.endm
const gff_base
.rept 2
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13
.byte 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30
.byte 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee
.byte 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2
.byte 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89
.byte 0x0f, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1
.byte 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0x0d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93
.byte 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda
.byte 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4
.byte 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6
.byte 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b
.byte 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32
.byte 0x64, 0xc8, 0x8d, 0x07, 0x0e, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2
.byte 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, 0x12
.byte 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16
.byte 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e
.endr
.byte 0x01
endconst
const gflog_base
.byte 0x00, 0xff, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7
.byte 0x4b, 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08
.byte 0x4c, 0x71, 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, 0x35, 0x93, 0x8e, 0xda, 0xf0
.byte 0x12, 0x82, 0x45, 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, 0xc9, 0x9a, 0x09, 0x78
.byte 0x4d, 0xe4, 0x72, 0xa6, 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, 0xe2, 0x98, 0x25
.byte 0xb3, 0x10, 0x91, 0x22, 0x88, 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, 0xf1, 0xd2
.byte 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, 0x6b
.byte 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b
.byte 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63
.byte 0x0d, 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8
.byte 0xb4, 0x7c, 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, 0x37, 0x3f, 0xd1, 0x5b, 0x95
.byte 0xbc, 0xcf, 0xcd, 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, 0xf2, 0x56, 0xd3, 0xab
.byte 0x14, 0x2a, 0x5d, 0x9e, 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, 0x1f, 0x2d, 0x43
.byte 0xd8, 0xb7, 0x7b, 0xa4, 0x76, 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, 0x6c, 0xa1
.byte 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, 0xcb
.byte 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7
.byte 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58
.byte 0xaf
endconst

View File

@@ -0,0 +1,161 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_2vect_dot_prod_rvv
.type gf_2vect_dot_prod_rvv, @function
/* void gf_2vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
/* arguments */
#define x_len a0 /* vector length */
#define x_vec a1 /* number of source vectors (ie. data blocks) */
#define x_tbl a2
#define x_src a3
#define x_dest a4
/* local variables */
#define x_vec_i t0
#define x_ptr t1
#define x_pos t2
#define x_tbl1 t3
#define x_tbl2 t4
#define x_dest1 t5
#define x_dest2 a7
/* vectors */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v3
#define v_dest1 v4
#define v_gft1_lo v5
#define v_gft1_hi v6
#define v_gft2_lo v7
#define v_gft2_hi v8
#define v_dest2 v9
gf_2vect_dot_prod_rvv:
/* less than 16 bytes, return_fail */
li t6, 16
blt x_len, t6, .return_fail
vsetvli a5, x0, e8, m1 /* Set vector length to maximum */
li x_pos, 0
ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
/* Loop 1: x_len, vector length */
.Llooprvv_vl:
bge x_pos, x_len, .return_pass
li x_vec_i, 0 /* clear x_vec_i */
ld x_ptr, 0(x_src) /* x_ptr: src base addr. */
vmv.v.i v_dest1, 0 /* clear v_dest1 */
vmv.v.i v_dest2, 0 /* clear v_dest2 */
/* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */
mv x_tbl1, x_tbl /* reset x_tbl1 */
slli t6, x_vec, 5
add x_tbl2, x_tbl1, t6 /* reset x_tbl2 */
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
.Llooprvv_vl_vects:
/* load src data */
slli a6, x_vec_i, 3
add a6,x_src,a6
ld x_ptr, 0(a6)
add x_ptr,x_ptr,x_pos
vle8.v v_src, (x_ptr) /* load from: src base + pos offset */
/* split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* gf_tbl addr: (x_tbl + dest_idx * x_vec * 32) + src_vec_idx * 32 */
/* load gf_table's */
vle8.v v_gft1_lo, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft1_hi, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft2_lo, (x_tbl2)
addi x_tbl2, x_tbl2, 16
vle8.v v_gft2_hi, (x_tbl2)
addi x_tbl2, x_tbl2, 16
/* dest 1 */
/* table indexing, ie. gf(2^8) multiplication */
vrgather.vv v26, v_gft1_lo, v_src_lo
vrgather.vv v27, v_gft1_hi, v_src_hi
/* exclusive or, ie. gf(2^8) add */
vxor.vv v_dest1, v_dest1, v26
vxor.vv v_dest1, v_dest1, v27
/* dest 2 */
vrgather.vv v26, v_gft2_lo, v_src_lo
vrgather.vv v27, v_gft2_hi, v_src_hi
vxor.vv v_dest2, v_dest2, v26
vxor.vv v_dest2, v_dest2, v27
/* calc for next */
addi x_vec_i, x_vec_i, 1 /* move x_vec_i to next */
blt x_vec_i, x_vec, .Llooprvv_vl_vects
/* end of Loop 2 */
/* store dest data */
vse8.v v_dest1, (x_dest1)
vse8.v v_dest2, (x_dest2)
add x_dest1,x_dest1,a5
add x_dest2,x_dest2,a5
/* increment one vector length */
add x_pos, x_pos, a5
j .Llooprvv_vl
/* end of Loop 1 */
.return_pass:
li a0, 0
ret
.return_fail:
li a0, 1
ret
#endif

View File

@@ -0,0 +1,148 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_2vect_mad_rvv
.type gf_2vect_mad_rvv, @function
/* gf_2vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
*/
/* arguments */
#define x_len a0
#define x_vec a1
#define x_vec_i a2
#define x_tbl a3
#define x_src a4
#define x_dest a5
/* returns */
#define w_ret a0
/* local variables */
#define x_pos t0
#define x_dest2 t1
#define x_dest1 t2
/* vectors */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v1
#define v_dest1 v3
#define v_tmp_lo v4
#define v_tmp_hi v5
#define v_gft1_lo v6
#define v_gft1_hi v7
#define v_gft2_lo v17
#define v_gft2_hi v18
#define v_dest2 v27
gf_2vect_mad_rvv:
/* less than 16 bytes, return_fail */
li t3, 16
blt x_len, t3, .return_fail
vsetvli t4, x0, e8, m1
/* load table 1 */
slli t3, x_vec_i, 5
add x_tbl, x_tbl, t3
vle8.v v_gft1_lo, (x_tbl)
addi t3, x_tbl, 16
vle8.v v_gft1_hi, (t3)
/* load table 2 */
slli t3, x_vec, 5
add x_tbl, x_tbl, t3
vle8.v v_gft2_lo, (x_tbl)
addi t3, x_tbl, 16
vle8.v v_gft2_hi, (t3)
/* load dest pointers */
ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
li x_pos, 0
.Llooprvv_vl:
blt x_pos, x_len, .Lloop_body
j .return_pass
.Lloop_body:
/* load src data */
add t3, x_src, x_pos
vle8.v v_src, (t3)
/* split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* load dest data */
add t3, x_dest1, x_pos
vle8.v v_dest1, (t3)
add t3, x_dest2, x_pos
vle8.v v_dest2, (t3)
/* dest1 */
/* table indexing, ie. gf(2^8) multiplication */
vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi
/* exclusive or, ie. gf(2^8) add */
vxor.vv v_dest1, v_tmp_lo, v_dest1
vxor.vv v_dest1, v_tmp_hi, v_dest1
/* dest2 */
vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi
vxor.vv v_dest2, v_tmp_lo, v_dest2
vxor.vv v_dest2, v_tmp_hi, v_dest2
/* store dest data */
add t3, x_dest1, x_pos
vse8.v v_dest1, (t3)
add t3, x_dest2, x_pos
vse8.v v_dest2, (t3)
/* increment one vector length */
add x_pos, x_pos, t4
j .Llooprvv_vl
.return_pass:
li w_ret, 0
ret
.return_fail:
li w_ret, 1
ret
#endif

View File

@@ -0,0 +1,188 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_3vect_dot_prod_rvv
.type gf_3vect_dot_prod_rvv, @function
/* void gf_3vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
/* arguments */
#define x_len a0 /* vector length */
#define x_vec a1 /* number of source vectors (ie. data blocks) */
#define x_tbl a2 /* gftbls */
#define x_src a3 /* src */
#define x_dest a4 /* dest */
/* local variables */
#define x_vec_i t1
#define x_ptr t2
#define x_pos t3
#define x_tbl1 t4
#define x_tbl2 t5
#define x_tbl3 t6
#define x_dest1 s0
#define x_dest2 s1
#define x_dest3 a5
#define t_offset a6
/* vectors */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v3
#define v_dest1 v4
#define v_dest2 v5
#define v_dest3 v6
#define v_gft1_lo v8
#define v_gft1_hi v9
#define v_gft2_lo v10
#define v_gft2_hi v11
#define v_gft3_lo v12
#define v_gft3_hi v13
gf_3vect_dot_prod_rvv:
/* less than 16 bytes, return_fail */
li t0, 16
blt x_len, t0, .return_fail
/* save callee-saved registers */
addi sp, sp, -16
sd s0, 0(sp)
sd s1, 8(sp)
vsetvli a7, x0, e8, m1 /* Set vector length to maximum */
li x_pos, 0
slli t_offset, x_vec, 5
ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
ld x_dest3, 16(x_dest)
.Lloop_rvv_vl:
/* check if we have processed all elements */
bge x_pos, x_len, .return_pass
/* Clear destination vectors */
vmv.v.i v_dest1, 0
vmv.v.i v_dest2, 0
vmv.v.i v_dest3, 0
/* Reset table pointers */
mv x_tbl1, x_tbl
add x_tbl2, x_tbl1, t_offset
add x_tbl3, x_tbl2, t_offset
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
li x_vec_i, 0
.Lloop_rvv_vl_vects:
/* Load source data */
slli t0, x_vec_i, 3
add t0,x_src,t0
ld x_ptr, 0(t0)
add x_ptr,x_ptr,x_pos
vle8.v v_src, (x_ptr)
/* Split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* Load gf_table's */
vle8.v v_gft1_lo, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft1_hi, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft2_lo, (x_tbl2)
addi x_tbl2, x_tbl2, 16
vle8.v v_gft2_hi, (x_tbl2)
addi x_tbl2, x_tbl2, 16
/* Load next gf_table's */
vle8.v v_gft3_lo, (x_tbl3)
addi x_tbl3, x_tbl3, 16
vle8.v v_gft3_hi, (x_tbl3)
addi x_tbl3, x_tbl3, 16
/* dest 1 */
vrgather.vv v26, v_gft1_lo, v_src_lo
vrgather.vv v27, v_gft1_hi, v_src_hi
vxor.vv v_dest1, v_dest1, v26
vxor.vv v_dest1, v_dest1, v27
/* dest 2 */
vrgather.vv v26, v_gft2_lo, v_src_lo
vrgather.vv v27, v_gft2_hi, v_src_hi
vxor.vv v_dest2, v_dest2, v26
vxor.vv v_dest2, v_dest2, v27
/* dest 3 */
vrgather.vv v26, v_gft3_lo, v_src_lo
vrgather.vv v27, v_gft3_hi, v_src_hi
vxor.vv v_dest3, v_dest3, v26
vxor.vv v_dest3, v_dest3, v27
/* Move to next source vector */
addi x_vec_i, x_vec_i, 1
/* Check if we have processed all vectors */
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects
/* Store destination data */
vse8.v v_dest1, (x_dest1)
vse8.v v_dest2, (x_dest2)
vse8.v v_dest3, (x_dest3)
add x_dest1,x_dest1, a7
add x_dest2,x_dest2, a7
add x_dest3,x_dest3, a7
add x_pos, x_pos, a7
j .Lloop_rvv_vl
.return_pass:
ld s0, 0(sp)
ld s1, 8(sp)
addi sp, sp, 16
li a0, 0
ret
.return_fail:
li a0, 1
ret
#endif

View File

@@ -0,0 +1,170 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_3vect_mad_rvv
.type gf_3vect_mad_rvv, @function
/* gf_3vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
*/
/* arguments */
#define x_len a0
#define x_vec a1
#define x_vec_i a2
#define x_tbl a3
#define x_src a4
#define x_dest a5
/* returns */
#define w_ret a0
/* local variables */
#define x_pos t0
#define x_dest1 t1
#define x_dest2 t2
#define x_dest3 t3
/* vectors */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v3
#define v_dest1 v4
#define v_tmp_lo v5
#define v_tmp_hi v6
#define v_gft1_lo v7
#define v_gft1_hi v8
#define v_gft2_lo v9
#define v_gft2_hi v10
#define v_gft3_lo v11
#define v_gft3_hi v12
#define v_dest2 v19
#define v_dest3 v20
gf_3vect_mad_rvv:
/* less than 16 bytes, return_fail */
li t4, 16
blt x_len, t4, .return_fail
vsetvli t5, x0, e8, m1
/* Load table 1 */
slli t4, x_vec_i, 5
add x_tbl, x_tbl, t4
vle8.v v_gft1_lo, (x_tbl)
addi t4, x_tbl, 16
vle8.v v_gft1_hi, (t4)
/* Load table 2 */
slli t4, x_vec, 5
add x_tbl, x_tbl, t4
vle8.v v_gft2_lo, (x_tbl)
addi t4, x_tbl, 16
vle8.v v_gft2_hi, (t4)
/* Load table 3 */
slli t4, x_vec, 5
add x_tbl, x_tbl, t4
vle8.v v_gft3_lo, (x_tbl)
addi t4, x_tbl, 16
vle8.v v_gft3_hi, (t4)
/* Load destination pointers */
ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
ld x_dest3, 16(x_dest)
li x_pos, 0
.Llooprvv_vl:
blt x_pos, x_len, .Lloop_body
j .return_pass
.Lloop_body:
/* Load source data */
add t6, x_src, x_pos
vle8.v v_src, (t6)
/* Split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* load dest data */
add t6, x_dest1, x_pos
vle8.v v_dest1, (t6)
add t6, x_dest2, x_pos
vle8.v v_dest2, (t6)
add t6, x_dest3, x_pos
vle8.v v_dest3, (t6)
/* dest1 */
vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi
vxor.vv v_dest1, v_tmp_lo, v_dest1
vxor.vv v_dest1, v_tmp_hi, v_dest1
/* dest2 */
vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi
vxor.vv v_dest2, v_tmp_lo, v_dest2
vxor.vv v_dest2, v_tmp_hi, v_dest2
/* dest3 */
vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi
vxor.vv v_dest3, v_tmp_lo, v_dest3
vxor.vv v_dest3, v_tmp_hi, v_dest3
/* Store destination data */
add t6, x_dest1, x_pos
vse8.v v_dest1, (t6)
add t6, x_dest2, x_pos
vse8.v v_dest2, (t6)
add t6, x_dest3, x_pos
vse8.v v_dest3, (t6)
/* Increment position */
add x_pos, x_pos, t5
j .Llooprvv_vl
.return_pass:
li w_ret, 0
ret
.return_fail:
li w_ret, 1
ret
#endif

View File

@@ -0,0 +1,214 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_4vect_dot_prod_rvv
.type gf_4vect_dot_prod_rvv, @function
/* void gf_4vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
/* arguments */
#define x_len a0 /* vector length */
#define x_vec a1 /* number of source vectors (ie. data blocks) */
#define x_tbl a2
#define x_src a3
#define x_dest a4
/* local variables */
#define x_vec_i a7
#define x_ptr t1
#define x_pos t2
#define x_tbl1 t3
#define x_tbl2 t4
#define x_tbl3 t5
#define x_tbl4 t6
#define x_dest1 s0
#define x_dest2 s1
#define x_dest3 s2
#define x_dest4 s3
#define t_offset a5
/* vectors */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v3
#define v_dest1 v4
#define v_dest2 v5
#define v_dest3 v6
#define v_dest4 v7
#define v_gft1_lo v8
#define v_gft1_hi v9
#define v_gft2_lo v10
#define v_gft2_hi v11
#define v_gft3_lo v12
#define v_gft3_hi v13
#define v_gft4_lo v14
#define v_gft4_hi v15
gf_4vect_dot_prod_rvv:
/* less than 16 bytes, return_fail */
li t0, 16
blt x_len, t0, .return_fail
/* save callee-saved registers */
addi sp, sp, -32
sd s0, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
li x_pos, 0
slli t_offset, x_vec, 5
ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
ld x_dest3, 16(x_dest)
ld x_dest4, 24(x_dest)
/* Loop 1: x_len, vector length */
.Lloop_rvv_vl:
/* check if we have processed all elements */
bge x_pos, x_len, .return_pass
/* Clear destination vectors */
vmv.v.i v_dest1, 0
vmv.v.i v_dest2, 0
vmv.v.i v_dest3, 0
vmv.v.i v_dest4, 0
/* Reset table pointers */
mv x_tbl1, x_tbl
add x_tbl2, x_tbl1, t_offset
add x_tbl3, x_tbl2, t_offset
add x_tbl4, x_tbl3, t_offset
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
li x_vec_i, 0
.Lloop_rvv_vl_vects:
/* Load source data */
slli a6, x_vec_i, 3
add a6,x_src,a6
ld x_ptr, 0(a6)
add x_ptr,x_ptr,x_pos
vle8.v v_src, (x_ptr)
/* Split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* Load gf_table's */
vle8.v v_gft1_lo, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft1_hi, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft2_lo, (x_tbl2)
addi x_tbl2, x_tbl2, 16
vle8.v v_gft2_hi, (x_tbl2)
addi x_tbl2, x_tbl2, 16
/* Load next gf_table's */
vle8.v v_gft3_lo, (x_tbl3)
addi x_tbl3, x_tbl3, 16
vle8.v v_gft3_hi, (x_tbl3)
addi x_tbl3, x_tbl3, 16
vle8.v v_gft4_lo, (x_tbl4)
addi x_tbl4, x_tbl4, 16
vle8.v v_gft4_hi, (x_tbl4)
addi x_tbl4, x_tbl4, 16
/* dest 1 */
vrgather.vv v26, v_gft1_lo, v_src_lo
vrgather.vv v27, v_gft1_hi, v_src_hi
vxor.vv v_dest1, v_dest1, v26
vxor.vv v_dest1, v_dest1, v27
/* dest 2 */
vrgather.vv v26, v_gft2_lo, v_src_lo
vrgather.vv v27, v_gft2_hi, v_src_hi
vxor.vv v_dest2, v_dest2, v26
vxor.vv v_dest2, v_dest2, v27
/* dest 3 */
vrgather.vv v26, v_gft3_lo, v_src_lo
vrgather.vv v27, v_gft3_hi, v_src_hi
vxor.vv v_dest3, v_dest3, v26
vxor.vv v_dest3, v_dest3, v27
/* dest 4 */
vrgather.vv v26, v_gft4_lo, v_src_lo
vrgather.vv v27, v_gft4_hi, v_src_hi
vxor.vv v_dest4, v_dest4, v26
vxor.vv v_dest4, v_dest4, v27
/* Move to next source vector */
addi x_vec_i, x_vec_i, 1
/* Check if we have processed all vectors */
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects
/* Store destination data */
vse8.v v_dest1, (x_dest1)
vse8.v v_dest2, (x_dest2)
vse8.v v_dest3, (x_dest3)
vse8.v v_dest4, (x_dest4)
add x_dest1,x_dest1, t0
add x_dest2,x_dest2, t0
add x_dest3,x_dest3, t0
add x_dest4,x_dest4, t0
/* Increment position */
add x_pos, x_pos, t0
j .Lloop_rvv_vl
.return_pass:
/* restore callee-saved registers */
ld s0, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
addi sp, sp, 32
li a0, 0
ret
.return_fail:
li a0, 1
ret
#endif

View File

@@ -0,0 +1,189 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_4vect_mad_rvv
.type gf_4vect_mad_rvv, @function
/* gf_4vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
*/
/* arguments */
#define x_len a0
#define x_vec a1
#define x_vec_i a2
#define x_tbl a3
#define x_src a4
#define x_dest a5
/* returns */
#define w_ret a0
/* local variables */
#define x_pos t0
#define x_dest1 t1
#define x_dest2 t2
#define x_dest3 t3
#define x_dest4 t4
/* vectors */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v3
#define v_dest1 v4
#define v_tmp_lo v5
#define v_tmp_hi v6
#define v_gft1_lo v7
#define v_gft1_hi v8
#define v_gft2_lo v9
#define v_gft2_hi v10
#define v_gft3_lo v11
#define v_gft3_hi v12
#define v_gft4_lo v13
#define v_gft4_hi v14
#define v_dest2 v15
#define v_dest3 v16
#define v_dest4 v17
gf_4vect_mad_rvv:
/* less than 16 bytes, return_fail */
li t5, 16
blt x_len, t5, .return_fail
vsetvli t6, x0, e8, m1
/* load table 1 */
slli t5, x_vec_i, 5
add x_tbl, x_tbl, t5
vle8.v v_gft1_lo, (x_tbl)
addi t5, x_tbl, 16
vle8.v v_gft1_hi, (t5)
/* load table 2 */
slli t5, x_vec, 5
add x_tbl, x_tbl, t5
vle8.v v_gft2_lo, (x_tbl)
addi t5, x_tbl, 16
vle8.v v_gft2_hi, (t5)
/* load table 3 */
slli t5, x_vec, 5
add x_tbl, x_tbl, t5
vle8.v v_gft3_lo, (x_tbl)
addi t5, x_tbl, 16
vle8.v v_gft3_hi, (t5)
/* load table 4 */
slli t5, x_vec, 5
add x_tbl, x_tbl, t5
vle8.v v_gft4_lo, (x_tbl)
addi t5, x_tbl, 16
vle8.v v_gft4_hi, (t5)
/* load dest pointers */
ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
ld x_dest3, 16(x_dest)
ld x_dest4, 24(x_dest)
li x_pos, 0
.Llooprvv_vl:
blt x_pos, x_len, .Lloop_body
j .return_pass
.Lloop_body:
/* load src data */
add t5, x_src, x_pos
vle8.v v_src, (t5)
/* split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* load dest data */
add t5, x_dest1, x_pos
vle8.v v_dest1, (t5)
add t5, x_dest2, x_pos
vle8.v v_dest2, (t5)
add t5, x_dest3, x_pos
vle8.v v_dest3, (t5)
add t5, x_dest4, x_pos
vle8.v v_dest4, (t5)
/* dest1 */
/* table indexing, ie. gf(2^8) multiplication */
vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi
/* exclusive or, ie. gf(2^8) add */
vxor.vv v_dest1, v_tmp_lo, v_dest1
vxor.vv v_dest1, v_tmp_hi, v_dest1
/* dest2 */
vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi
vxor.vv v_dest2, v_tmp_lo, v_dest2
vxor.vv v_dest2, v_tmp_hi, v_dest2
/* dest3 */
vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi
vxor.vv v_dest3, v_tmp_lo, v_dest3
vxor.vv v_dest3, v_tmp_hi, v_dest3
/* dest4 */
vrgather.vv v_tmp_lo, v_gft4_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft4_hi, v_src_hi
vxor.vv v_dest4, v_tmp_lo, v_dest4
vxor.vv v_dest4, v_tmp_hi, v_dest4
/* store dest data */
add t5, x_dest1, x_pos
vse8.v v_dest1, (t5)
add t5, x_dest2, x_pos
vse8.v v_dest2, (t5)
add t5, x_dest3, x_pos
vse8.v v_dest3, (t5)
add t5, x_dest4, x_pos
vse8.v v_dest4, (t5)
add x_pos, x_pos, t6
j .Llooprvv_vl
.return_pass:
li w_ret, 0
ret
.return_fail:
li w_ret, 1
ret
#endif

View File

@@ -0,0 +1,242 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_5vect_dot_prod_rvv
.type gf_5vect_dot_prod_rvv, @function
/* void gf_5vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
/* arguments */
#define x_len a0 /* vector length */
#define x_vec a1 /* number of source vectors (ie. data blocks) */
#define x_tbl a2
#define x_src a3
#define x_dest a4
/* local variables */
#define x_vec_i a7
#define x_ptr t1
#define x_pos t2
#define x_tbl1 t3
#define x_tbl2 t4
#define x_tbl3 t5
#define x_tbl4 t6
#define x_tbl5 s0
#define x_dest1 s1
#define x_dest2 s2
#define x_dest3 s3
#define x_dest4 s4
#define x_dest5 s5
/* vectors */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v3
#define v_dest1 v4
#define v_gft1_lo v5
#define v_gft1_hi v6
#define v_gft2_lo v7
#define v_gft2_hi v8
#define v_gft3_lo v9
#define v_gft3_hi v10
#define v_gft4_lo v11
#define v_gft4_hi v12
#define v_gft5_lo v13
#define v_gft5_hi v14
#define v_dest2 v15
#define v_dest3 v16
#define v_dest4 v17
#define v_dest5 v18
gf_5vect_dot_prod_rvv:
/* less than 16 bytes, return_fail */
li t0, 16
blt x_len, t0, .return_fail
/* save s0-s4 */
addi sp, sp, -48
sd s0, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
sd s4, 32(sp)
sd s5, 40(sp)
vsetvli a5, x0, e8, m1
/* Initialize position */
li x_pos, 0
/* Load destination pointers */
ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
ld x_dest3, 16(x_dest)
ld x_dest4, 24(x_dest)
ld x_dest5, 32(x_dest)
/* Loop 1: x_len, vector length */
.Llooprvv_vl:
bge x_pos, x_len, .return_pass
/* Clear destination vectors */
vmv.v.i v_dest1, 0
vmv.v.i v_dest2, 0
vmv.v.i v_dest3, 0
vmv.v.i v_dest4, 0
vmv.v.i v_dest5, 0
/* Reset table pointers */
mv x_tbl1, x_tbl
slli t0, x_vec, 5
add x_tbl2, x_tbl1, t0
add x_tbl3, x_tbl2, t0
add x_tbl4, x_tbl3, t0
add x_tbl5, x_tbl4, t0
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
li x_vec_i, 0
.Llooprvv_vl_vects:
/* Load source data */
slli a6, x_vec_i, 3
add a6,x_src,a6
ld x_ptr, 0(a6)
add x_ptr, x_ptr, x_pos
vle8.v v_src, (x_ptr)
/* Split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* Load gf_table's */
vle8.v v_gft1_lo, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft1_hi, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft2_lo, (x_tbl2)
addi x_tbl2, x_tbl2, 16
vle8.v v_gft2_hi, (x_tbl2)
addi x_tbl2, x_tbl2, 16
/* Move to next source vector */
addi x_vec_i, x_vec_i, 1
/* dest 1 */
vrgather.vv v26, v_gft1_lo, v_src_lo
vrgather.vv v27, v_gft1_hi, v_src_hi
vxor.vv v_dest1, v_dest1, v26
vxor.vv v_dest1, v_dest1, v27
/* Load more gf_table's */
vle8.v v_gft3_lo, (x_tbl3)
addi x_tbl3, x_tbl3, 16
vle8.v v_gft3_hi, (x_tbl3)
addi x_tbl3, x_tbl3, 16
vle8.v v_gft4_lo, (x_tbl4)
addi x_tbl4, x_tbl4, 16
vle8.v v_gft4_hi, (x_tbl4)
addi x_tbl4, x_tbl4, 16
/* dest 2 */
vrgather.vv v26, v_gft2_lo, v_src_lo
vrgather.vv v27, v_gft2_hi, v_src_hi
vxor.vv v_dest2, v_dest2, v26
vxor.vv v_dest2, v_dest2, v27
/* dest 3 */
vrgather.vv v26, v_gft3_lo, v_src_lo
vrgather.vv v27, v_gft3_hi, v_src_hi
vxor.vv v_dest3, v_dest3, v26
vxor.vv v_dest3, v_dest3, v27
/* Load more gf_table's */
vle8.v v_gft5_lo, (x_tbl5)
addi x_tbl5, x_tbl5, 16
vle8.v v_gft5_hi, (x_tbl5)
addi x_tbl5, x_tbl5, 16
/* dest 4 */
vrgather.vv v26, v_gft4_lo, v_src_lo
vrgather.vv v27, v_gft4_hi, v_src_hi
vxor.vv v_dest4, v_dest4, v26
vxor.vv v_dest4, v_dest4, v27
/* dest 5 */
vrgather.vv v26, v_gft5_lo, v_src_lo
vrgather.vv v27, v_gft5_hi, v_src_hi
vxor.vv v_dest5, v_dest5, v26
vxor.vv v_dest5, v_dest5, v27
/* Check if we have processed all vectors */
blt x_vec_i, x_vec, .Llooprvv_vl_vects
vse8.v v_dest1, (x_dest1)
vse8.v v_dest2, (x_dest2)
vse8.v v_dest3, (x_dest3)
vse8.v v_dest4, (x_dest4)
vse8.v v_dest5, (x_dest5)
/* Store destination data */
add x_dest1,x_dest1,a5
add x_dest2,x_dest2,a5
add x_dest3,x_dest3,a5
add x_dest4,x_dest4,a5
add x_dest5,x_dest5,a5
/* Increment position */
add x_pos, x_pos, a5
j .Llooprvv_vl
.return_pass:
/* Restore callee-saved registers */
ld s0, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
ld s4, 32(sp)
ld s5, 40(sp)
addi sp, sp, 48
li a0, 0
ret
.return_fail:
li a0, 1
ret
#endif

View File

@@ -0,0 +1,214 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_5vect_mad_rvv
.type gf_5vect_mad_rvv, @function
/* gf_5vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
*/
/* arguments */
#define x_len a0
#define x_vec a1
#define x_vec_i a2
#define x_tbl a3
#define x_src a4
#define x_dest a5
/* returns */
#define w_ret a0
/* local variables */
#define x_pos t0
#define x_dest1 t1
#define x_dest2 t2
#define x_dest3 t3
#define x_dest4 t4
#define x_dest5 t5
/* vectors */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v3
#define v_dest1 v4
#define v_tmp_lo v5
#define v_tmp_hi v6
#define v_gft1_lo v7
#define v_gft1_hi v8
#define v_gft2_lo v9
#define v_gft2_hi v10
#define v_gft3_lo v11
#define v_gft3_hi v12
#define v_gft4_lo v13
#define v_gft4_hi v14
#define v_gft5_lo v15
#define v_gft5_hi v16
#define v_dest2 v19
#define v_dest3 v20
#define v_dest4 v21
#define v_dest5 v22
gf_5vect_mad_rvv:
/* less than 16 bytes, return_fail */
li t6, 16
blt x_len, t6, .return_fail
vsetvli a7, x0, e8, m1
/* Load table 1 */
slli a6, x_vec_i, 5
add x_tbl, x_tbl, a6
vle8.v v_gft1_lo, (x_tbl)
addi a6, x_tbl, 16
vle8.v v_gft1_hi, (a6)
/* Load table 2 */
slli a6, x_vec, 5
add x_tbl, x_tbl, a6
vle8.v v_gft2_lo, (x_tbl)
addi a6, x_tbl, 16
vle8.v v_gft2_hi, (a6)
/* Load table 3 */
slli a6, x_vec, 5
add x_tbl, x_tbl, a6
vle8.v v_gft3_lo, (x_tbl)
addi a6, x_tbl, 16
vle8.v v_gft3_hi, (a6)
/* Load table 4 */
slli a6, x_vec, 5
add x_tbl, x_tbl, a6
vle8.v v_gft4_lo, (x_tbl)
addi a6, x_tbl, 16
vle8.v v_gft4_hi, (a6)
/* Load table 5 */
slli a6, x_vec, 5
add x_tbl, x_tbl, a6
vle8.v v_gft5_lo, (x_tbl)
addi a6, x_tbl, 16
vle8.v v_gft5_hi, (a6)
/* Load destination pointers */
ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
ld x_dest3, 16(x_dest)
ld x_dest4, 24(x_dest)
ld x_dest5, 32(x_dest)
li x_pos, 0
.Llooprvv_vl:
blt x_pos, x_len, .Lloop_body
j .return_pass
.Lloop_body:
/* Load source data */
add t6, x_src, x_pos
vle8.v v_src, (t6)
/* Split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* load dest data */
add t6, x_dest1, x_pos
vle8.v v_dest1, (t6)
add t6, x_dest2, x_pos
vle8.v v_dest2, (t6)
add t6, x_dest3, x_pos
vle8.v v_dest3, (t6)
add t6, x_dest4, x_pos
vle8.v v_dest4, (t6)
add t6, x_dest5, x_pos
vle8.v v_dest5, (t6)
/* dest1 */
vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi
vxor.vv v_dest1, v_tmp_lo, v_dest1
vxor.vv v_dest1, v_tmp_hi, v_dest1
/* dest2 */
vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi
vxor.vv v_dest2, v_tmp_lo, v_dest2
vxor.vv v_dest2, v_tmp_hi, v_dest2
/* dest3 */
vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi
vxor.vv v_dest3, v_tmp_lo, v_dest3
vxor.vv v_dest3, v_tmp_hi, v_dest3
/* dest4 */
vrgather.vv v_tmp_lo, v_gft4_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft4_hi, v_src_hi
vxor.vv v_dest4, v_tmp_lo, v_dest4
vxor.vv v_dest4, v_tmp_hi, v_dest4
/* dest5 */
vrgather.vv v_tmp_lo, v_gft5_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft5_hi, v_src_hi
vxor.vv v_dest5, v_tmp_lo, v_dest5
vxor.vv v_dest5, v_tmp_hi, v_dest5
/* Store destination data */
add t6, x_dest1, x_pos
vse8.v v_dest1, (t6)
add t6, x_dest2, x_pos
vse8.v v_dest2, (t6)
add t6, x_dest3, x_pos
vse8.v v_dest3, (t6)
add t6, x_dest4, x_pos
vse8.v v_dest4, (t6)
add t6, x_dest5, x_pos
vse8.v v_dest5, (t6)
/* Increment position */
add x_pos, x_pos, a7
j .Llooprvv_vl
.return_pass:
li w_ret, 0
ret
.return_fail:
li w_ret, 1
ret
#endif

View File

@@ -0,0 +1,273 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_6vect_dot_prod_rvv
.type gf_6vect_dot_prod_rvv, @function
/* void gf_6vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
/* arguments */
#define x_len a0 /* vector length */
#define x_vec a1 /* number of source vectors (ie. data blocks) */
#define x_tbl a2 /* gftbls */
#define x_src a3 /* src */
#define x_dest a4 /* dest */
/* local variables */
#define x_vec_i a7 /* loop counter for vectors */
#define x_ptr t1 /* pointer to current src */
#define x_pos t2 /* position in vector */
#define x_tbl1 t3 /* table pointer 1 */
#define x_tbl2 t4 /* table pointer 2 */
#define x_tbl3 t5 /* table pointer 3 */
#define x_tbl4 t6 /* table pointer 4 */
#define x_tbl5 s0 /* table pointer 5 */
#define x_tbl6 s1 /* table pointer 6 */
#define x_dest1 s2 /* dest pointer 1 */
#define x_dest2 s3 /* dest pointer 2 */
#define x_dest3 s4 /* dest pointer 3 */
#define x_dest4 s5 /* dest pointer 4 t12 -- x28 */
#define x_dest5 s6 /* dest pointer 5 */
#define x_dest6 s7 /* dest pointer 6 */
/* vector registers */
#define v_src v1 /* source vector */
#define v_src_lo v2 /* low 4 bits of source */
#define v_src_hi v3 /* high 4 bits of source */
#define v_dest1 v4 /* destination vector 1 */
#define v_dest2 v5 /* destination vector 2 */
#define v_dest3 v6 /* destination vector 3 */
#define v_dest4 v7 /* destination vector 4 */
#define v_dest5 v8 /* destination vector 5 */
#define v_dest6 v9 /* destination vector 6 */
#define v_gft1_lo v10 /* gf table 1 low */
#define v_gft1_hi v11 /* gf table 1 high */
#define v_gft2_lo v12 /* gf table 2 low */
#define v_gft2_hi v13 /* gf table 2 high */
#define v_gft3_lo v14 /* gf table 3 low */
#define v_gft3_hi v15 /* gf table 3 high */
#define v_gft4_lo v16 /* gf table 4 low */
#define v_gft4_hi v17 /* gf table 4 high */
#define v_gft5_lo v18 /* gf table 5 low */
#define v_gft5_hi v19 /* gf table 5 high */
#define v_gft6_lo v20 /* gf table 6 low */
#define v_gft6_hi v21 /* gf table 6 high */
gf_6vect_dot_prod_rvv:
/* less than 16 bytes, return_fail */
li t0, 16
blt x_len, t0, .return_fail
/* save callee-saved registers */
addi sp, sp, -64
sd s0, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
sd s4, 32(sp)
sd s5, 40(sp)
sd s6, 48(sp)
sd s7, 56(sp)
li t0, 0x0F
vsetvli a5, x0, e8, m1
/* initialize position */
li x_pos, 0
/* load destination pointers */
ld x_dest1, 0(x14) # a4 is also x14
ld x_dest2, 8(x_dest)
ld x_dest3, 16(x_dest)
ld x_dest4, 24(x_dest)
ld x_dest5, 32(x_dest)
ld x_dest6, 40(x_dest)
.Llooprvv_vl:
/* check if we have processed all elements */
bge x_pos, x_len, .return_pass
/* initialize vector loop counter */
li x_vec_i, 0
/* load source pointer */
ld x_ptr, 0(x_src)
/* clear destination vectors */
vmv.v.i v_dest1, 0
vmv.v.i v_dest2, 0
vmv.v.i v_dest3, 0
vmv.v.i v_dest4, 0
vmv.v.i v_dest5, 0
vmv.v.i v_dest6, 0
/* initialize table pointers */
/* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */
mv x_tbl1, x_tbl
slli t0, x_vec, 5
add x_tbl2, x_tbl1, t0
add x_tbl3, x_tbl2, t0
add x_tbl4, x_tbl3, t0
add x_tbl5, x_tbl4, t0
add x_tbl6, x_tbl5, t0
.Llooprvv_vl_vects:
/* load source data */
slli a6, x_vec_i, 3
add a6,x_src,a6
ld x_ptr, 0(a6)
add x_ptr,x_ptr,x_pos
vle8.v v_src, (x_ptr)
/* split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* load gf_table's */
vle8.v v_gft1_lo, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft1_hi, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft2_lo, (x_tbl2)
addi x_tbl2, x_tbl2, 16
vle8.v v_gft2_hi, (x_tbl2)
addi x_tbl2, x_tbl2, 16
vle8.v v_gft3_lo, (x_tbl3)
addi x_tbl3, x_tbl3, 16
vle8.v v_gft3_hi, (x_tbl3)
addi x_tbl3, x_tbl3, 16
vle8.v v_gft4_lo, (x_tbl4)
addi x_tbl4, x_tbl4, 16
vle8.v v_gft4_hi, (x_tbl4)
addi x_tbl4, x_tbl4, 16
vle8.v v_gft5_lo, (x_tbl5)
addi x_tbl5, x_tbl5, 16
vle8.v v_gft5_hi, (x_tbl5)
addi x_tbl5, x_tbl5, 16
vle8.v v_gft6_lo, (x_tbl6)
addi x_tbl6, x_tbl6, 16
vle8.v v_gft6_hi, (x_tbl6)
addi x_tbl6, x_tbl6, 16
/* dest 1 */
vrgather.vv v26, v_gft1_lo, v_src_lo
vrgather.vv v27, v_gft1_hi, v_src_hi
vxor.vv v_dest1, v_dest1, v26
vxor.vv v_dest1, v_dest1, v27
/* dest 2 */
vrgather.vv v26, v_gft2_lo, v_src_lo
vrgather.vv v27, v_gft2_hi, v_src_hi
vxor.vv v_dest2, v_dest2, v26
vxor.vv v_dest2, v_dest2, v27
/* GF multiplication and accumulation for dest3 */
vrgather.vv v26, v_gft3_lo, v_src_lo
vrgather.vv v27, v_gft3_hi, v_src_hi
vxor.vv v_dest3, v_dest3, v26
vxor.vv v_dest3, v_dest3, v27
/* GF multiplication and accumulation for dest4 */
vrgather.vv v26, v_gft4_lo, v_src_lo
vrgather.vv v27, v_gft4_hi, v_src_hi
vxor.vv v_dest4, v_dest4, v26
vxor.vv v_dest4, v_dest4, v27
/* GF multiplication and accumulation for dest5 */
vrgather.vv v26, v_gft5_lo, v_src_lo
vrgather.vv v27, v_gft5_hi, v_src_hi
vxor.vv v_dest5, v_dest5, v26
vxor.vv v_dest5, v_dest5, v27
/* GF multiplication and accumulation for dest6 */
vrgather.vv v26, v_gft6_lo, v_src_lo
vrgather.vv v27, v_gft6_hi, v_src_hi
vxor.vv v_dest6, v_dest6, v26
vxor.vv v_dest6, v_dest6, v27
/* load next source pointer */
addi x_vec_i, x_vec_i,1
/* check if we have processed all vectors */
blt x_vec_i, x_vec, .Llooprvv_vl_vects
/* store destination data */
vse8.v v_dest1, (x_dest1) # x_dest1 v_dest1==v4
vse8.v v_dest2, (x_dest2) #x_dest2
vse8.v v_dest3, (x_dest3) #x_dest3
vse8.v v_dest4, (x_dest4) # x_dest4
vse8.v v_dest5, (x_dest5) # x_dest5
vse8.v v_dest6, (x_dest6) # x_dest6
add x_dest1,x_dest1, a5
add x_dest2,x_dest2, a5
add x_dest3,x_dest3, a5
add x_dest4,x_dest4, a5
add x_dest5,x_dest5, a5
add x_dest6,x_dest6, a5
/* increment position */
add x_pos, x_pos, a5
j .Llooprvv_vl
.return_pass:
/* restore callee-saved registers */
ld s0, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
ld s4, 32(sp)
ld s5, 40(sp)
ld s6, 48(sp)
ld s7, 56(sp)
addi sp, sp, 64
li a0, 0
ret
.return_fail:
li a0, 1
ret
#endif

View File

@@ -0,0 +1,241 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_6vect_mad_rvv
.type gf_6vect_mad_rvv, @function
/* gf_6vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
*/
/* arguments */
#define x_len a0
#define x_vec a1
#define x_vec_i a2
#define x_tbl a3
#define x_src a4
#define x_dest a5
/* returns */
#define w_ret a0
/* local variables */
#define x_pos t0
#define x_dest1 t1
#define x_dest2 t2
#define x_dest3 t3
#define x_dest4 t4
#define x_dest5 t5
#define x_dest6 t6
/* vectors */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v3
#define v_dest1 v4
#define v_tmp_lo v5
#define v_tmp_hi v6
#define v_gft1_lo v7
#define v_gft1_hi v8
#define v_gft2_lo v9
#define v_gft2_hi v10
#define v_gft3_lo v11
#define v_gft3_hi v12
#define v_gft4_lo v13
#define v_gft4_hi v14
#define v_gft5_lo v15
#define v_gft5_hi v16
#define v_gft6_lo v17
#define v_gft6_hi v18
#define v_dest2 v19
#define v_dest3 v20
#define v_dest4 v21
#define v_dest5 v22
#define v_dest6 v23
gf_6vect_mad_rvv:
/* less than 16 bytes, return_fail */
li t6, 16
blt x_len, t6, .return_fail
/* save callee-saved registers */
addi sp, sp, -16
sd s8, 0(sp)
vsetvli a6, x0, e8, m1
/* Load table 1 */
slli s8, x_vec_i, 5
add x_tbl, x_tbl, s8
vle8.v v_gft1_lo, (x_tbl)
addi s8, x_tbl, 16
vle8.v v_gft1_hi, (s8)
/* Load table 2 */
slli s8, x_vec, 5
add x_tbl, x_tbl, s8
vle8.v v_gft2_lo, (x_tbl)
addi s8, x_tbl, 16
vle8.v v_gft2_hi, (s8)
/* Load table 3 */
slli s8, x_vec, 5
add x_tbl, x_tbl, s8
vle8.v v_gft3_lo, (x_tbl)
addi s8, x_tbl, 16
vle8.v v_gft3_hi, (s8)
/* Load table 4 */
slli s8, x_vec, 5
add x_tbl, x_tbl, s8
vle8.v v_gft4_lo, (x_tbl)
addi s8, x_tbl, 16
vle8.v v_gft4_hi, (s8)
/* Load table 5 */
slli s8, x_vec, 5
add x_tbl, x_tbl, s8
vle8.v v_gft5_lo, (x_tbl)
addi s8, x_tbl, 16
vle8.v v_gft5_hi, (s8)
/* Load table 6 */
slli s8, x_vec, 5
add x_tbl, x_tbl, s8
vle8.v v_gft6_lo, (x_tbl)
addi s8, x_tbl, 16
vle8.v v_gft6_hi, (s8)
/* Load destination pointers */
ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
ld x_dest3, 16(x_dest)
ld x_dest4, 24(x_dest)
ld x_dest5, 32(x_dest)
ld x_dest6, 40(x_dest)
li x_pos, 0
.Llooprvv_vl:
blt x_pos, x_len, .Lloop_body
j .return_pass
.Lloop_body:
/* Load source data */
add a7, x_src, x_pos
vle8.v v_src, (a7)
/* Split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* load dest data */
add a7, x_dest1, x_pos
vle8.v v_dest1, (a7)
add a7, x_dest2, x_pos
vle8.v v_dest2, (a7)
add a7, x_dest3, x_pos
vle8.v v_dest3, (a7)
add a7, x_dest4, x_pos
vle8.v v_dest4, (a7)
add a7, x_dest5, x_pos
vle8.v v_dest5, (a7)
add a7, x_dest6, x_pos
vle8.v v_dest6, (a7)
/* dest1 */
vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi
vxor.vv v_dest1, v_tmp_lo, v_dest1
vxor.vv v_dest1, v_tmp_hi, v_dest1
/* dest2 */
vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi
vxor.vv v_dest2, v_tmp_lo, v_dest2
vxor.vv v_dest2, v_tmp_hi, v_dest2
/* dest3 */
vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi
vxor.vv v_dest3, v_tmp_lo, v_dest3
vxor.vv v_dest3, v_tmp_hi, v_dest3
/* dest4 */
vrgather.vv v_tmp_lo, v_gft4_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft4_hi, v_src_hi
vxor.vv v_dest4, v_tmp_lo, v_dest4
vxor.vv v_dest4, v_tmp_hi, v_dest4
/* dest5 */
vrgather.vv v_tmp_lo, v_gft5_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft5_hi, v_src_hi
vxor.vv v_dest5, v_tmp_lo, v_dest5
vxor.vv v_dest5, v_tmp_hi, v_dest5
/* dest6 */
vrgather.vv v_tmp_lo, v_gft6_lo, v_src_lo
vrgather.vv v_tmp_hi, v_gft6_hi, v_src_hi
vxor.vv v_dest6, v_tmp_lo, v_dest6
vxor.vv v_dest6, v_tmp_hi, v_dest6
/* Store destination data */
add a7, x_dest1, x_pos
vse8.v v_dest1, (a7)
add a7, x_dest2, x_pos
vse8.v v_dest2, (a7)
add a7, x_dest3, x_pos
vse8.v v_dest3, (a7)
add a7, x_dest4, x_pos
vse8.v v_dest4, (a7)
add a7, x_dest5, x_pos
vse8.v v_dest5, (a7)
add a7, x_dest6, x_pos
vse8.v v_dest6, (a7)
/* Increment position */
add x_pos, x_pos, a6
j .Llooprvv_vl
.return_pass:
ld s8, 0(sp)
addi sp, sp, 16
li w_ret, 0
ret
.return_fail:
li w_ret, 1
ret
#endif

View File

@@ -0,0 +1,299 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_7vect_dot_prod_rvv
.type gf_7vect_dot_prod_rvv, @function
/* void gf_7vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
/* arguments */
#define x_len a0 /* vector length */
#define x_vec a1 /* number of source vectors (ie. data blocks) */
#define x_tbl a2
#define x_src a3
#define x_dest a4
/* local variables */
#define x_vec_i t1
#define x_ptr t2
#define x_pos t3
#define x_tbl1 t4
#define x_tbl2 t5
#define x_tbl3 t6
#define x_tbl4 s8
#define x_tbl5 a6
#define x_tbl6 a7
#define x_tbl7 s0
#define x_dest1 s1
#define x_dest2 s2
#define x_dest3 s3
#define x_dest4 s4
#define x_dest5 s5
#define x_dest6 s6
#define x_dest7 s7
/* vectors */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v3
#define v_dest1 v4
#define v_dest2 v5 /* destination 2 */
#define v_dest3 v6 /* destination 3 */
#define v_dest4 v7 /* destination 4 */
#define v_dest5 v8 /* destination 5 */
#define v_dest6 v9 /* destination 6 */
#define v_dest7 v10 /* destination 7 */
#define v_gft1_lo v11
#define v_gft1_hi v12
#define v_gft2_lo v13 /* GF table 2 low */
#define v_gft2_hi v14 /* GF table 2 high */
#define v_gft3_lo v15 /* GF table 3 low */
#define v_gft3_hi v16 /* GF table 3 high */
#define v_gft4_lo v17 /* GF table 4 low */
#define v_gft4_hi v18 /* GF table 4 high */
#define v_gft5_lo v19 /* GF table 5 low */
#define v_gft5_hi v20 /* GF table 5 high */
#define v_gft6_lo v21 /* GF table 6 low */
#define v_gft6_hi v22 /* GF table 6 high */
#define v_gft7_lo v23
#define v_gft7_hi v24
gf_7vect_dot_prod_rvv:
/* less than 16 bytes, return_fail */
li t0, 16
blt x_len, t0, .return_fail
/* save callee-saved registers */
addi sp, sp, -80
sd s0, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
sd s4, 32(sp)
sd s5, 40(sp)
sd s6, 48(sp)
sd s7, 56(sp)
sd s8, 64(sp)
vsetvli t0, x0, e8, m1
/* initialize position */
li x_pos, 0
/* load destination pointers */
ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
ld x_dest3, 16(x_dest)
ld x_dest4, 24(x_dest)
ld x_dest5, 32(x_dest)
ld x_dest6, 40(x_dest)
ld x_dest7, 48(x_dest)
/* Loop 1: x_len, vector length */
.Llooprvv_vl:
/* check if we have processed all elements */
bge x_pos, x_len, .return_pass
/* initialize vector loop counter */
li x_vec_i, 0
/* load source pointer */
ld x_ptr, 0(x_src)
/* clear destination vectors */
vmv.v.i v_dest1, 0
vmv.v.i v_dest2, 0
vmv.v.i v_dest3, 0
vmv.v.i v_dest4, 0
vmv.v.i v_dest5, 0
vmv.v.i v_dest6, 0
vmv.v.i v_dest7, 0
/* reset table pointers */
mv x_tbl1, x_tbl
mv x_tbl1, x_tbl
slli a5, x_vec, 5
add x_tbl2, x_tbl1, a5
add x_tbl3, x_tbl2, a5
add x_tbl4, x_tbl3, a5
add x_tbl5, x_tbl4, a5
add x_tbl6, x_tbl5, a5
add x_tbl7, x_tbl6, a5
.Llooprvv_vl_vects:
/* load source data */
slli a5, x_vec_i, 3
add a5,x_src,a5
ld x_ptr, 0(a5)
add x_ptr,x_ptr,x_pos
vle8.v v_src, (x_ptr)
/* split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* load gf_table's */
vle8.v v_gft1_lo, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft1_hi, (x_tbl1)
addi x_tbl1, x_tbl1, 16
vle8.v v_gft2_lo, (x_tbl2)
addi x_tbl2, x_tbl2, 16
vle8.v v_gft2_hi, (x_tbl2)
addi x_tbl2, x_tbl2, 16
vle8.v v_gft3_lo, (x_tbl3)
addi x_tbl3, x_tbl3, 16
vle8.v v_gft3_hi, (x_tbl3)
addi x_tbl3, x_tbl3, 16
vle8.v v_gft4_lo, (x_tbl4)
addi x_tbl4, x_tbl4, 16
vle8.v v_gft4_hi, (x_tbl4)
addi x_tbl4, x_tbl4, 16
vle8.v v_gft5_lo, (x_tbl5)
addi x_tbl5, x_tbl5, 16
vle8.v v_gft5_hi, (x_tbl5)
addi x_tbl5, x_tbl5, 16
vle8.v v_gft6_lo, (x_tbl6)
addi x_tbl6, x_tbl6, 16
vle8.v v_gft6_hi, (x_tbl6)
addi x_tbl6, x_tbl6, 16
vle8.v v_gft7_lo, (x_tbl7)
addi x_tbl7, x_tbl7, 16
vle8.v v_gft7_hi, (x_tbl7)
addi x_tbl7, x_tbl7, 16
/* dest 1 */
vrgather.vv v26, v_gft1_lo, v_src_lo
vrgather.vv v27, v_gft1_hi, v_src_hi
vxor.vv v_dest1, v_dest1, v26
vxor.vv v_dest1, v_dest1, v27
/* dest 2 */
vrgather.vv v26, v_gft2_lo, v_src_lo
vrgather.vv v27, v_gft2_hi, v_src_hi
vxor.vv v_dest2, v_dest2, v26
vxor.vv v_dest2, v_dest2, v27
/* GF multiplication and accumulation for dest3 */
vrgather.vv v26, v_gft3_lo, v_src_lo
vrgather.vv v27, v_gft3_hi, v_src_hi
vxor.vv v_dest3, v_dest3, v26
vxor.vv v_dest3, v_dest3, v27
/* GF multiplication and accumulation for dest4 */
vrgather.vv v26, v_gft4_lo, v_src_lo
vrgather.vv v27, v_gft4_hi, v_src_hi
vxor.vv v_dest4, v_dest4, v26
vxor.vv v_dest4, v_dest4, v27
/* GF multiplication and accumulation for dest5 */
vrgather.vv v26, v_gft5_lo, v_src_lo
vrgather.vv v27, v_gft5_hi, v_src_hi
vxor.vv v_dest5, v_dest5, v26
vxor.vv v_dest5, v_dest5, v27
/* GF multiplication and accumulation for dest6 */
vrgather.vv v26, v_gft6_lo, v_src_lo
vrgather.vv v27, v_gft6_hi, v_src_hi
vxor.vv v_dest6, v_dest6, v26
vxor.vv v_dest6, v_dest6, v27
/* GF multiplication and accumulation for dest7 */
vrgather.vv v26, v_gft7_lo, v_src_lo
vrgather.vv v27, v_gft7_hi, v_src_hi
vxor.vv v_dest7, v_dest7, v26
vxor.vv v_dest7, v_dest7, v27
/* increment x_vec_i */
addi x_vec_i, x_vec_i, 1
blt x_vec_i, x_vec, .Llooprvv_vl_vects
/* Store results to destination */
vse8.v v_dest1, (x_dest1)
vse8.v v_dest2, (x_dest2)
vse8.v v_dest3, (x_dest3)
vse8.v v_dest4, (x_dest4)
vse8.v v_dest5, (x_dest5)
vse8.v v_dest6, (x_dest6)
vse8.v v_dest7, (x_dest7)
add x_dest1,x_dest1, t0
add x_dest2,x_dest2, t0
add x_dest3,x_dest3, t0
add x_dest4,x_dest4, t0
add x_dest5,x_dest5, t0
add x_dest6,x_dest6, t0
add x_dest7,x_dest7, t0
/* increment one vector length */
add x_pos, x_pos, t0
j .Llooprvv_vl
.return_pass:
/* Restore callee-saved registers */
ld s0, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
ld s4, 32(sp)
ld s5, 40(sp)
ld s6, 48(sp)
ld s7, 56(sp)
ld s8, 64(sp)
addi sp, sp, 80
/* Return success */
li a0, 0
ret
.return_fail:
li a0, 1 # return fail
ret
#endif

View File

@@ -0,0 +1,136 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
# RISC-V RVV implementation of gf_vect_dot_prod_rvv
# Function: gf_vect_dot_prod_rvv
# Arguments:
# a0: len (vector length)
# a1: vlen (number of source vectors)
# a2: gftbls (pointer to GF(2^8) multiplication tables)
# a3: src (pointer to array of source vector pointers)
# a4: dest (pointer to destination vector)
# Local variables:
# t0: vec_i (source vector index)
# t1: ptr (pointer to current source vector)
# t2: pos (current position in vector)
# t3: tbl1 (pointer to current GF table)
# Vector registers:
# v0: z_mask0f (mask for low 4 bits)
# v1: z_src (source vector data)
# v2: z_src_lo (low 4 bits of source vector)
# v3: z_src_hi (high 4 bits of source vector)
# v4: z_dest (destination vector)
# v5: z_gft1_lo (low 8 bits of GF table)
# v6: z_gft1_hi (high 8 bits of GF table)
#if HAVE_RVV
.global gf_vect_dot_prod_rvv
.type gf_vect_dot_prod_rvv, @function
gf_vect_dot_prod_rvv:
# Check if len < 16
li t4, 16
blt a0, t4, .return_fail
vsetvli t5, zero, e8, m1 # Set vector length to maximum
# Initialize pos = 0
li t2, 0
# Multiply vlen by 8 (each pointer is 8 bytes)
slli a1, a1, 3
.Llooprvv_vl:
# Check if pos >= len
bge t2, a0, .return_pass
# Clear z_dest
vmv.v.i v4, 0
# Initialize vec_i = 0
li t0, 0
# Reset tbl1 to gftbls
mv t3, a2
.Llooprvv_vl_vects:
# Load src[vec_i] into ptr
add t6, a3, t0 # src + vec_i * 8
ld t1, 0(t6) # Load pointer to current source vector
# Load src data into z_src
add t1, t1, t2 # add offset
vle8.v v1, (t1) # Load source vector into v1
# Increment vec_i
addi t0, t0, 8
# Load GF table (low and high)
vle8.v v5, (t3) # Load low 8 bits of GF table
addi t3, t3, 16 # Move to next GF table entry
vle8.v v6, (t3) # Load high 8 bits of GF table
addi t3, t3, 16 # Move to next GF table entry
# Split src into low and high 4 bits
vand.vi v2, v1, 0x0F # z_src_lo = z_src & z_mask0f
vsrl.vi v3, v1, 4 # z_src_hi = z_src >> 4
# GF multiplication (table lookup)
vrgather.vv v8, v5, v2 # z_gft1_lo = GF table lookup for low 4 bits
vrgather.vv v9, v6, v3 # z_gft1_hi = GF table lookup for high 4 bits
# GF addition (XOR)
vxor.vv v4, v4, v8 # z_dest ^= z_gft1_lo
vxor.vv v4, v4, v9 # z_dest ^= z_gft1_hi
# Check if vec_i < vlen
blt t0, a1, .Llooprvv_vl_vects
# Store z_dest to dest[pos]
vse8.v v4, (a4) # Store destination vector
add a4, a4, t5 # Move dest pointer to next position
# Increment pos
add t2, t2, t5 # pos += 16 (vector length)
j .Llooprvv_vl
.return_pass:
li a0, 0 # Return 0 (success)
ret
.return_fail:
li a0, 1 # Return 1 (failure)
ret
#endif

View File

@@ -0,0 +1,119 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_vect_mad_rvv
.type gf_vect_mad_rvv, @function
/* gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char *dest);
*/
/* arguments */
#define x_len a0
#define x_vec_i a2
#define x_tbl a3
#define x_src a4
#define x_dest a5
/* returns */
#define w_ret a0
/* local variables */
#define x_pos t0
/* vectors */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v3
#define v_dest v4
#define v_tmp1_lo v5
#define v_tmp1_hi v6
#define v_gft1_lo v7
#define v_gft1_hi v8
gf_vect_mad_rvv:
/* less than 16 bytes, return_fail */
li t1, 16
blt x_len, t1, .return_fail
vsetvli t2, x0, e8, m1
/* x_tbl += x_vec_i * 2^5 */
slli t1, x_vec_i, 5
add x_tbl, x_tbl, t1
/* Load gft1_lo and gft1_hi */
vle8.v v_gft1_lo, (x_tbl)
addi t1, x_tbl, 16
vle8.v v_gft1_hi, (t1)
li x_pos, 0
.Lloop_rvv_vl:
/* load src data */
vle8.v v_src, (x_src)
/* split 4-bit lo; 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* load dest data */
vle8.v v_dest, (x_dest)
/* table indexing, ie. gf(2^8) multiplication */
/* RISC-V RVV does not have tbl instruction, use vrgather.vv */
vrgather.vv v_tmp1_lo, v_gft1_lo, v_src_lo
vrgather.vv v_tmp1_hi, v_gft1_hi, v_src_hi
/* exclusive or, ie. gf(2^8) add */
vxor.vv v_dest, v_tmp1_lo, v_dest
vxor.vv v_dest, v_tmp1_hi, v_dest
/* store dest data */
vse8.v v_dest, (x_dest)
/* increment one vector length */
add x_pos, x_pos, t2
add x_src, x_src, t2
add x_dest, x_dest, t2
blt x_pos, x_len, .Lloop_rvv_vl
.return_pass:
li w_ret, 0
ret
.return_fail:
li w_ret, 1
ret
#endif

View File

@@ -0,0 +1,114 @@
##################################################################
# Copyright (c) 2025 sanechips Technologies Co., Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of sanechips Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
#if HAVE_RVV
.text
.align 2
.global gf_vect_mul_rvv
.type gf_vect_mul_rvv, @function
/* Function arguments:
* a0: len - Length of vector in bytes.
* a1: gftbl - Pointer to 32-byte array of pre-calculated constants.
* a2: src - Pointer to source data array.
* a3: dest - Pointer to destination data array.
* Returns:
* a0: 0 for success, 1 for failure.
*/
/* Local variables */
#define x_pos t0
#define x_tmp t1
#define x_ptr t2
#define x_len a0
#define x_tbl a1
#define x_src a2
#define x_dest a3
/* Vector registers */
#define v_src v1
#define v_src_lo v2
#define v_src_hi v3
#define v_dest v4
#define v_tmp1_lo v5
#define v_tmp1_hi v6
#define v_gft1_lo v7
#define v_gft1_hi v8
gf_vect_mul_rvv:
/* Check if len is 32 bytes */
andi x_tmp, x_len, 0x1F
bnez x_tmp, .return_fail
vsetvli t6, x0, e8, m1
/* Load pre-calculated constants into v_gft1_lo and v_gft1_hi */
vle8.v v_gft1_lo, (x_tbl)
addi t3, x_tbl, 16
vle8.v v_gft1_hi, (t3)
/* Initialize position counter */
li x_pos, 0
.Llooprvv_vl:
/* Load source data into v_src */
add x_ptr,x_src,x_pos
vle8.v v_src, (x_ptr)
/* Split 4-bit lo and 4-bit hi */
vand.vi v_src_lo, v_src, 0x0F
vsrl.vi v_src_hi, v_src, 4
/* Table lookup (GF multiplication) */
vrgather.vv v_tmp1_lo, v_gft1_lo, v_src_lo
vrgather.vv v_tmp1_hi, v_gft1_hi, v_src_hi
/* XOR (GF addition) */
vxor.vv v_dest, v_tmp1_hi, v_tmp1_lo
/* Store result to destination */
vse8.v v_dest, (x_dest)
/* Increment position counter */
add x_pos, x_pos, t6
add x_dest, x_dest, t6
/* Check if we have processed all bytes */
blt x_pos, x_len, .Llooprvv_vl
.return_pass:
li a0, 0
ret
.return_fail:
li a0, 1
ret
#endif