diff --git a/Release_notes.txt b/Release_notes.txt index 3400b5e..60cf4f5 100644 --- a/Release_notes.txt +++ b/Release_notes.txt @@ -152,7 +152,7 @@ v2.32 - Added new RVV xor_gen, pq_gen implementations. * Erasure coding improvements: - - Added new RVV ec_encode_data, gf_vect_dot_prod, gf_vect_mul implementations. + - Added new RVV ec_encode_data,ec_encode_data_update,gf_vect_mad, gf_vect_dot_prod, gf_vect_mul implementations. * Zero-memory detection improvements: - Added new RVV implementations. diff --git a/configure.ac b/configure.ac index 545f614..47f3427 100644 --- a/configure.ac +++ b/configure.ac @@ -67,8 +67,13 @@ case "${CPU}" in ])], [AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions]) AM_CONDITIONAL([HAVE_RVV], [true]) rvv=yes], - [AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no] + [AC_DEFINE([HAVE_RVV], [0], [Disable RVV instructions]) + AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no] ) + if test "x$rvv" = "xyes"; then + CFLAGS+=" -march=rv64gcv" + CCASFLAGS+=" -march=rv64gcv" + fi AC_MSG_RESULT([$rvv]) ;; diff --git a/erasure_code/erasure_code_test.c b/erasure_code/erasure_code_test.c index 365f0e1..0058cdd 100644 --- a/erasure_code/erasure_code_test.c +++ b/erasure_code/erasure_code_test.c @@ -50,6 +50,9 @@ #define EFENCE_TEST_MIN_SIZE 16 #define EFENCE_TEST_MAX_SIZE EFENCE_TEST_MIN_SIZE + 0x100 +#if HAVE_RVV +#define EC_ALIGNED_ADDR +#endif #ifdef EC_ALIGNED_ADDR // Define power of 2 range to check ptr, len alignment #define PTR_ALIGN_CHK_B 0 diff --git a/erasure_code/erasure_code_update_test.c b/erasure_code/erasure_code_update_test.c index 13be40b..f5710d2 100644 --- a/erasure_code/erasure_code_update_test.c +++ b/erasure_code/erasure_code_update_test.c @@ -35,8 +35,13 @@ #include "test.h" #ifndef ALIGN_SIZE +#if HAVE_RVV +#define EC_ALIGNED_ADDR +#define ALIGN_SIZE 32 +#else #define ALIGN_SIZE 16 #endif +#endif // By default, test multibinary version #ifndef FUNCTION_UNDER_TEST diff --git a/erasure_code/riscv64/Makefile.am b/erasure_code/riscv64/Makefile.am index 29e0d01..de03ccb 100644 --- a/erasure_code/riscv64/Makefile.am +++ b/erasure_code/riscv64/Makefile.am @@ -1,3 +1,31 @@ +######################################################################### +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## ######################################################################## # Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). # @@ -28,8 +56,20 @@ ######################################################################## lsrc_riscv64 += \ - erasure_code/riscv64/ec_multibinary_riscv64_dispatcher.c \ - erasure_code/riscv64/ec_multibinary_riscv64.S \ - erasure_code/riscv64/ec_gf_vect_mul_rvv.S \ - erasure_code/riscv64/ec_gf_vect_dot_prod_rvv.S \ - erasure_code/riscv64/ec_encode_data_rvv.S + erasure_code/riscv64/ec_riscv64_dispatcher.c \ + erasure_code/riscv64/ec_multibinary_riscv64.S \ + erasure_code/riscv64/ec_riscv64_highlevel_func.c \ + erasure_code/riscv64/gf_vect_dot_prod_rvv.S \ + erasure_code/riscv64/gf_2vect_dot_prod_rvv.S \ + erasure_code/riscv64/gf_3vect_dot_prod_rvv.S \ + erasure_code/riscv64/gf_4vect_dot_prod_rvv.S \ + erasure_code/riscv64/gf_5vect_dot_prod_rvv.S \ + erasure_code/riscv64/gf_6vect_dot_prod_rvv.S \ + erasure_code/riscv64/gf_7vect_dot_prod_rvv.S \ + erasure_code/riscv64/gf_vect_mad_rvv.S \ + erasure_code/riscv64/gf_2vect_mad_rvv.S \ + erasure_code/riscv64/gf_3vect_mad_rvv.S \ + erasure_code/riscv64/gf_4vect_mad_rvv.S \ + erasure_code/riscv64/gf_5vect_mad_rvv.S \ + erasure_code/riscv64/gf_6vect_mad_rvv.S \ + erasure_code/riscv64/gf_vect_mul_rvv.S diff --git a/erasure_code/riscv64/ec_encode_data_rvv.S b/erasure_code/riscv64/ec_encode_data_rvv.S deleted file mode 100644 index b568cbc..0000000 --- a/erasure_code/riscv64/ec_encode_data_rvv.S +++ /dev/null @@ -1,154 +0,0 @@ -/********************************************************************** - Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of ISCAS nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ -#if HAVE_RVV -#include "ec_table.S" -.option arch, +v -.global ec_encode_data_rvv -.type ec_encode_data_rvv, %function -ec_encode_data_rvv: - blez a2, 3f - blez a0, 3f - - lla t0, gff_base - lla t1, gflog_base - addi a3, a3, 1 - vsetvli zero, a1, e8, mf2, ta, ma - vmv.v.i v20, 0 - li t3, 32 - mv a6, a0 // backup len - mv a7, a5 // backup dest - - csrr t5, vlenb // vlen/8 - srli t5, t5, 1 // mf2: vlen/16 - blt t5, a1, slow // vlen/16(hardware) < vlen(software) - -2: - li t2, 0 // l - vlse8.v v24, (a3), t3 // v[j*32+1] - vmsne.vi v12, v24, 0 // if v == 0 - vluxei8.v v24, (t1), v24 // gflag_base[v[]] - ld a5, (a5) - -1: - vsetvli zero, zero, e8, mf2, ta, ma - vle64.v v16, (a4) // src[j] - vluxei64.v v16, (t2), v16 // src[j][i] - vmsne.vi v0, v16, 0 // if src == 0 - vmand.mm v0, v0, v12 // if src == 0 || v == 0 - vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]] - vwaddu.vv v8, v16, v24, v0.t - vmv.v.i v16, 0 - vsetvli zero, zero, e8, mf2, ta, mu - vluxei16.v v16, (t0), v8, v0.t // gff_base[i] - vxor.vv v20, v16, v20 - vmv.s.x v8, zero - vredxor.vs v8, v20, v8 - vmv.x.s t5, v8 - addi a0, a0, -1 // len - sb t5, (a5) // dest[0][i] - addi t2, t2, 1 // src[j][i] - vmv.v.i v20, 0 - addi a5, a5, 1 // dest[i] - bnez a0, 1b - - addi a2, a2, -1 // l(dests) - addi a7, a7, 8 - mv a0, a6 // restore len - mv a5, a7 // update unsigned char **dest - slli t5, a1, 5 // += vlen * 32 - add a3, a3, t5 - bnez a2, 2b - - ret - -slow: - addi sp, sp, -16 - sd s2, 0(sp) - sd s3, 8(sp) - mv s3, a4 // src - mv s2, a3 // v - mv t4, a1 // backup vlen - -2: - li t2, 0 // i < len - ld a5, (a5) - -1: - vsetvli t6, a1, e8, mf2, ta, ma - vle64.v v16, (a4) // src[j] - vluxei64.v v16, (t2), v16 // src[j][i] - vlse8.v v24, (a3), t3 // v[j*32+1] - vmsne.vi v12, v24, 0 // if v == 0 - vmsne.vi v0, v16, 0 // if src == 0 - vmand.mm v0, v0, v12 // if src == 0 || v == 0 - vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]] - vluxei8.v v24, (t1), v24 // gflag_base[v[]] - vwaddu.vv v8, v16, v24, v0.t - vmv.v.i v16, 0 - vsetvli zero, zero, e8, mf2, ta, mu - vluxei16.v v16, (t0), v8, v0.t // gff_base[i] - vxor.vv v20, v16, v20 - sub a1, a1, t6 - slli t5, t6, 5 - add a3, a3, t5 // v += 32 * vlen - slli t5, t6, 3 - add a4, a4, t5 // src += 8 * vlen - bnez a1, 1b // for (j = 0; j < vlen; j++) - - vsetvli zero, t4, e8, mf2, ta, ma - vmv.s.x v8, zero - vredxor.vs v8, v20, v8 - vmv.x.s t5, v8 - addi a0, a0, -1 // len - sb t5, (a5) // dest[0][i] - addi t2, t2, 1 // src[j][i] - vmv.v.i v20, 0 - mv a1, t4 // restore vlen - mv a3, s2 // restore v - mv a4, s3 // restore src - addi a5, a5, 1 // dest[i] - bnez a0, 1b // for (i = 0; i < len; i++) - - addi a2, a2, -1 // l(dests) - addi a7, a7, 8 // for (l = 0; l < dests; l++) - mv a0, a6 // restore len - mv a5, a7 - slli t5, t4, 5 - add a3, a3, t5 // v += vlen * 32 - mv s2, a3 - bnez a2, 2b // for (l = 0; l < dests; l++) { - - ld s2, 0(sp) - ld s3, 8(sp) - addi sp, sp, 16 - -3: - ret - -#endif diff --git a/erasure_code/riscv64/ec_gf_vect_dot_prod_rvv.S b/erasure_code/riscv64/ec_gf_vect_dot_prod_rvv.S deleted file mode 100644 index af6f944..0000000 --- a/erasure_code/riscv64/ec_gf_vect_dot_prod_rvv.S +++ /dev/null @@ -1,120 +0,0 @@ -/********************************************************************** - Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of ISCAS nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ -#if HAVE_RVV -#include "ec_table.S" -.option arch, +v -.global gf_vect_dot_prod_rvv -.type gf_vect_dot_prod_rvv, %function -gf_vect_dot_prod_rvv: - blez a0, 3f - - lla t0, gff_base - lla t1, gflog_base - addi a2, a2, 1 - vsetvli zero, a1, e8, mf2, ta, ma - vmv.v.i v20, 0 - li t2, 0 - li t3, 32 - csrr t5, vlenb // vlen/8 - srli t5, t5, 1 // mf2: vlen/16 - blt t5, a1, slow // vlen/16(hardware) < vlen(software) - - vlse8.v v24, (a2), t3 // v[j*32+1] - vmsne.vi v12, v24, 0 // if v == 0 - vluxei8.v v24, (t1), v24 // gflag_base[v[]] - -1: - vsetvli zero, zero, e8, mf2, ta, ma - vle64.v v16, (a3) // src[j] - vluxei64.v v16, (t2), v16 // src[j][i] - vmsne.vi v0, v16, 0 // if src == 0 - vmand.mm v0, v0, v12 // if src == 0 || v == 0 - vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]] - vwaddu.vv v8, v16, v24, v0.t - vmv.v.i v16, 0 - vsetvli zero, zero, e8, mf2, ta, mu - vluxei16.v v16, (t0), v8, v0.t // gff_base[i] - vxor.vv v20, v16, v20 - - vmv.s.x v8, zero - vredxor.vs v8, v20, v8 - vmv.x.s t5, v8 - addi a0, a0, -1 // len - sb t5, (a4) - addi t2, t2, 1 // src[j][i] - vmv.v.i v20, 0 - addi a4, a4, 1 // dest[i] - bnez a0, 1b - ret - -slow: - mv a7, a3 // src - mv a6, a2 // v - mv t4, a1 // vlen - -1: - vsetvli t6, a1, e8, mf2, ta, ma - vle64.v v16, (a3) - vluxei64.v v16, (t2), v16 // src[j][i] - vlse8.v v24, (a2), t3 // v[j*32+1] - vmsne.vi v0, v16, 0 // if src == 0 - vmsne.vi v12, v24, 0 // if v == 0 - vmand.mm v0, v0, v12 - vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]] - vluxei8.v v24, (t1), v24, v0.t // gflag_base[v[]] - vwaddu.vv v8, v16, v24, v0.t - vmv.v.i v16, 0 - vsetvli zero, zero, e8, mf2, ta, mu - vluxei16.v v16, (t0), v8, v0.t // gff_base[i] - vxor.vv v20, v16, v20 - slli t5, t6, 5 - add a2, a2, t5 // v += 32 * vlen - slli t5, t6, 3 - add a3, a3, t5 // src += 8 * vlen - sub a1, a1, t6 // vlen - bnez a1, 1b // for (j = 0; j < vlen; j++) - - vsetvli zero, t4, e8, mf2, ta, mu - vmv.s.x v8, zero - vredxor.vs v8, v20, v8 - vmv.x.s t5, v8 - addi a0, a0, -1 // len - mv a3, a7 // src - mv a2, a6 // v - mv a1, t4 // vlen - addi t2, t2, 1 // i - sb t5, (a4) - vmv.v.i v20, 0 - addi a4, a4, 1 // dest[i] - bnez a0, 1b // for (i = 0; i < len; i++) { - -3: - ret - -#endif diff --git a/erasure_code/riscv64/ec_gf_vect_mul_rvv.S b/erasure_code/riscv64/ec_gf_vect_mul_rvv.S deleted file mode 100644 index 60bdeec..0000000 --- a/erasure_code/riscv64/ec_gf_vect_mul_rvv.S +++ /dev/null @@ -1,76 +0,0 @@ -/********************************************************************** - Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of ISCAS nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ -#if HAVE_RVV -#include "ec_table.S" -.option arch, +v -.global gf_vect_mul_rvv -.type gf_vect_mul_rvv, %function -gf_vect_mul_rvv: - li t4, 32 - rem t4, a0, t4 - bnez t4, ret1 // (len % 32) != 0 - - lla t0, gff_base - lla t1, gflog_base - lbu t2, 1(a1) // unsigned char c = a[1]; - beqz t2, 2f - add t2, t1, t2 // &gflog_base[c] - lbu t2, (t2) // gflog_base[c] - -1: - vsetvli t6, a0, e8, m4, ta, ma - vle8.v v16, (a2) // src - vmsne.vi v0, v16, 0 // if b == 0 - vluxei8.v v16, (t1), v16, v0.t // gflag_base[b] - vwaddu.vx v8, v16, t2, v0.t - vmv.v.i v16, 0 - vluxei16.v v16, (t0), v8, v0.t // gff_base[i] - vse8.v v16, (a3) - add a2, a2, t6 - add a3, a3, t6 - sub a0, a0, t6 - bnez a0, 1b - ret - -2: - vsetvli t6, a0, e8, m8, ta, ma - vmv.v.i v0, 0 -3: - vsetvli t6, a0, e8, m8, ta, ma - vse8.v v0, (a3) - add a3, a3, t6 - sub a0, a0, t6 - bnez a0, 3b - ret - -ret1: - li a0, -1 - ret - -#endif diff --git a/erasure_code/riscv64/ec_multibinary_riscv64.S b/erasure_code/riscv64/ec_multibinary_riscv64.S index 80cf744..3657966 100644 --- a/erasure_code/riscv64/ec_multibinary_riscv64.S +++ b/erasure_code/riscv64/ec_multibinary_riscv64.S @@ -1,3 +1,31 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## /********************************************************************** Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). @@ -29,16 +57,9 @@ #include "riscv64_multibinary.h" -#if HAVE_RVV - mbin_interface gf_vect_mul - mbin_interface gf_vect_dot_prod - mbin_interface ec_encode_data -#else - mbin_interface_base gf_vect_mul gf_vect_mul_base - mbin_interface_base gf_vect_dot_prod gf_vect_dot_prod_base - mbin_interface_base ec_encode_data ec_encode_data_base -#endif - -mbin_interface_base ec_init_tables ec_init_tables_base -mbin_interface_base ec_encode_data_update ec_encode_data_update_base -mbin_interface_base gf_vect_mad gf_vect_mad_base +mbin_interface ec_encode_data +mbin_interface gf_vect_mul +mbin_interface gf_vect_dot_prod +mbin_interface gf_vect_mad +mbin_interface ec_encode_data_update +mbin_interface ec_init_tables diff --git a/erasure_code/riscv64/ec_multibinary_riscv64_dispatcher.c b/erasure_code/riscv64/ec_multibinary_riscv64_dispatcher.c deleted file mode 100644 index 8759d72..0000000 --- a/erasure_code/riscv64/ec_multibinary_riscv64_dispatcher.c +++ /dev/null @@ -1,78 +0,0 @@ -/********************************************************************** - Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of ISCAS nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ -#include "riscv64_multibinary.h" - -extern int -gf_vect_mul_rvv(int len, unsigned char *a, unsigned char *src, unsigned char *dest); -extern int -gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest); -extern void -gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest); -extern void -gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src, - unsigned char *dest); -extern void -ec_encode_data_rvv(int len, int srcs, int dests, unsigned char *v, unsigned char **src, - unsigned char **dest); -extern void -ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, - unsigned char **dest); - -DEFINE_INTERFACE_DISPATCHER(gf_vect_mul) -{ -#if HAVE_RVV - const unsigned long hwcap = getauxval(AT_HWCAP); - if (hwcap & HWCAP_RV('V')) - return gf_vect_mul_rvv; - else -#endif - return gf_vect_mul_base; -} - -DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod) -{ -#if HAVE_RVV - const unsigned long hwcap = getauxval(AT_HWCAP); - if (hwcap & HWCAP_RV('V')) - return gf_vect_dot_prod_rvv; - else -#endif - return gf_vect_dot_prod_base; -} - -DEFINE_INTERFACE_DISPATCHER(ec_encode_data) -{ -#if HAVE_RVV - const unsigned long hwcap = getauxval(AT_HWCAP); - if (hwcap & HWCAP_RV('V')) - return ec_encode_data_rvv; - else -#endif - return ec_encode_data_base; -} diff --git a/erasure_code/riscv64/ec_riscv64_dispatcher.c b/erasure_code/riscv64/ec_riscv64_dispatcher.c new file mode 100644 index 0000000..1416966 --- /dev/null +++ b/erasure_code/riscv64/ec_riscv64_dispatcher.c @@ -0,0 +1,147 @@ +/************************************************************** + Copyright (c) 2025 sanechips Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of sanechips Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/********************************************************************** + Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of ISCAS nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "riscv64_multibinary.h" + +extern void +gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char *dest); +extern void +gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src, + unsigned char *dest); +extern void +gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); +extern void +gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, + unsigned char *dest); +extern void +ec_encode_data_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls, unsigned char *data, + unsigned char **coding); +extern void +ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, + unsigned char **dest); +extern int +gf_vect_mul_rvv(int len, unsigned char *a, unsigned char *src, unsigned char *dest); +extern int +gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest); +extern void +ec_encode_data_update_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding); +extern void +ec_encode_data_update_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, + unsigned char **dest); +extern void +ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls); + +DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod) +{ +#if HAVE_RVV + unsigned long auxval = getauxval(AT_HWCAP); + + if (auxval & HWCAP_RV('V')) + return gf_vect_dot_prod_rvv; +#endif + return gf_vect_dot_prod_base; +} + +DEFINE_INTERFACE_DISPATCHER(gf_vect_mad) +{ +#if HAVE_RVV + unsigned long auxval = getauxval(AT_HWCAP); + + if (auxval & HWCAP_RV('V')) + return gf_vect_mad_rvv; +#endif + return gf_vect_mad_base; +} + +DEFINE_INTERFACE_DISPATCHER(ec_encode_data) +{ +#if HAVE_RVV + unsigned long auxval = getauxval(AT_HWCAP); + + if (auxval & HWCAP_RV('V')) + return ec_encode_data_rvv; +#endif + return ec_encode_data_base; +} + +DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update) +{ +#if HAVE_RVV + unsigned long auxval = getauxval(AT_HWCAP); + + if (auxval & HWCAP_RV('V')) + return ec_encode_data_update_rvv; +#endif + return ec_encode_data_update_base; +} + +DEFINE_INTERFACE_DISPATCHER(gf_vect_mul) +{ +#if HAVE_RVV + unsigned long auxval = getauxval(AT_HWCAP); + + if (auxval & HWCAP_RV('V')) + return gf_vect_mul_rvv; +#endif + return gf_vect_mul_base; +} + +DEFINE_INTERFACE_DISPATCHER(ec_init_tables) { return ec_init_tables_base; } diff --git a/erasure_code/riscv64/ec_riscv64_highlevel_func.c b/erasure_code/riscv64/ec_riscv64_highlevel_func.c new file mode 100644 index 0000000..5d13319 --- /dev/null +++ b/erasure_code/riscv64/ec_riscv64_highlevel_func.c @@ -0,0 +1,188 @@ +/************************************************************** + Copyright (c) 2025 sanechips Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of sanechips Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#if HAVE_RVV +#include "erasure_code.h" + +/*external function*/ + +/* RVV */ +extern void +gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char *dest); +extern void +gf_2vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); +extern void +gf_3vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); +extern void +gf_4vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); +extern void +gf_5vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); +extern void +gf_6vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); +extern void +gf_7vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); +extern void +gf_8vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); +extern void +gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); +extern void +gf_2vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); +extern void +gf_3vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); +extern void +gf_4vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); +extern void +gf_5vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); +extern void +gf_6vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +void +ec_encode_data_rvv(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) +{ + if (len < 16) { + ec_encode_data_base(len, k, rows, g_tbls, data, coding); + return; + } + + while (rows > 11) { + gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + + switch (rows) { + case 11: + /* 7 + 4 */ + gf_7vect_dot_prod_rvv(len, k, g_tbls, data, coding); + g_tbls += 7 * k * 32; + coding += 7; + gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); + break; + case 10: + /* 6 + 4 */ + gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); + break; + case 9: + /* 5 + 4 */ + gf_5vect_dot_prod_rvv(len, k, g_tbls, data, coding); + g_tbls += 5 * k * 32; + coding += 5; + gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); + break; + case 8: + /* 4 + 4 */ + gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); + g_tbls += 4 * k * 32; + coding += 4; + gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); + break; + case 7: + gf_7vect_dot_prod_rvv(len, k, g_tbls, data, coding); + break; + case 6: + gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding); + break; + case 5: + gf_5vect_dot_prod_rvv(len, k, g_tbls, data, coding); + break; + case 4: + gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); + break; + case 3: + gf_3vect_dot_prod_rvv(len, k, g_tbls, data, coding); + break; + case 2: + gf_2vect_dot_prod_rvv(len, k, g_tbls, data, coding); + break; + case 1: + gf_vect_dot_prod_rvv(len, k, g_tbls, data, *coding); + break; + default: + break; + } +} + +void +ec_encode_data_update_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ + if (len < 16) { + ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + return; + } + while (rows > 6) { + gf_6vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 6: + gf_6vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); + break; + case 5: + gf_5vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); + break; + case 4: + gf_4vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); + break; + case 3: + gf_3vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); + break; + case 2: + gf_2vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); + break; + case 1: + gf_vect_mad_rvv(len, k, vec_i, g_tbls, data, *coding); + break; + default: + break; + } +} + +#endif diff --git a/erasure_code/riscv64/ec_table.S b/erasure_code/riscv64/ec_table.S deleted file mode 100644 index b0bec67..0000000 --- a/erasure_code/riscv64/ec_table.S +++ /dev/null @@ -1,88 +0,0 @@ -/********************************************************************** - Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of ISCAS nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ -// Reference: https://git.ffmpeg.org/gitweb/ffmpeg.git/commit/746f1ff36ac0d232687820fbde4e4efc79093af7 -.macro const sym, align=3, relocate=0 - .if \relocate - .pushsection .data.rel.ro - .else - .pushsection .rodata - .endif - .align \align - \sym: - - .macro endconst - .size \sym, . - \sym - .popsection - .purgem endconst - .endm -.endm - -const gff_base - .rept 2 - .byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13 - .byte 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30 - .byte 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee - .byte 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2 - .byte 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89 - .byte 0x0f, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1 - .byte 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0x0d - .byte 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93 - .byte 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda - .byte 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4 - .byte 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6 - .byte 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b - .byte 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32 - .byte 0x64, 0xc8, 0x8d, 0x07, 0x0e, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2 - .byte 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, 0x12 - .byte 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16 - .byte 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e - .endr - .byte 0x01 -endconst - -const gflog_base - .byte 0x00, 0xff, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7 - .byte 0x4b, 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08 - .byte 0x4c, 0x71, 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, 0x35, 0x93, 0x8e, 0xda, 0xf0 - .byte 0x12, 0x82, 0x45, 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, 0xc9, 0x9a, 0x09, 0x78 - .byte 0x4d, 0xe4, 0x72, 0xa6, 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, 0xe2, 0x98, 0x25 - .byte 0xb3, 0x10, 0x91, 0x22, 0x88, 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, 0xf1, 0xd2 - .byte 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, 0x6b - .byte 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b - .byte 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63 - .byte 0x0d, 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8 - .byte 0xb4, 0x7c, 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, 0x37, 0x3f, 0xd1, 0x5b, 0x95 - .byte 0xbc, 0xcf, 0xcd, 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, 0xf2, 0x56, 0xd3, 0xab - .byte 0x14, 0x2a, 0x5d, 0x9e, 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, 0x1f, 0x2d, 0x43 - .byte 0xd8, 0xb7, 0x7b, 0xa4, 0x76, 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, 0x6c, 0xa1 - .byte 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, 0xcb - .byte 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7 - .byte 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58 - .byte 0xaf -endconst diff --git a/erasure_code/riscv64/gf_2vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_2vect_dot_prod_rvv.S new file mode 100644 index 0000000..1f01595 --- /dev/null +++ b/erasure_code/riscv64/gf_2vect_dot_prod_rvv.S @@ -0,0 +1,161 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_2vect_dot_prod_rvv +.type gf_2vect_dot_prod_rvv, @function + +/* void gf_2vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + */ + +/* arguments */ +#define x_len a0 /* vector length */ +#define x_vec a1 /* number of source vectors (ie. data blocks) */ +#define x_tbl a2 +#define x_src a3 +#define x_dest a4 + +/* local variables */ +#define x_vec_i t0 +#define x_ptr t1 +#define x_pos t2 + +#define x_tbl1 t3 +#define x_tbl2 t4 +#define x_dest1 t5 +#define x_dest2 a7 + +/* vectors */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v3 + +#define v_dest1 v4 + +#define v_gft1_lo v5 +#define v_gft1_hi v6 + +#define v_gft2_lo v7 +#define v_gft2_hi v8 +#define v_dest2 v9 + +gf_2vect_dot_prod_rvv: + /* less than 16 bytes, return_fail */ + li t6, 16 + blt x_len, t6, .return_fail + + vsetvli a5, x0, e8, m1 /* Set vector length to maximum */ + + li x_pos, 0 + ld x_dest1, 0(x_dest) + ld x_dest2, 8(x_dest) + +/* Loop 1: x_len, vector length */ +.Llooprvv_vl: + bge x_pos, x_len, .return_pass + + li x_vec_i, 0 /* clear x_vec_i */ + ld x_ptr, 0(x_src) /* x_ptr: src base addr. */ + + vmv.v.i v_dest1, 0 /* clear v_dest1 */ + vmv.v.i v_dest2, 0 /* clear v_dest2 */ + + /* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */ + mv x_tbl1, x_tbl /* reset x_tbl1 */ + slli t6, x_vec, 5 + add x_tbl2, x_tbl1, t6 /* reset x_tbl2 */ + +/* Loop 2: x_vec, number of source vectors (ie. data blocks) */ +.Llooprvv_vl_vects: + /* load src data */ + slli a6, x_vec_i, 3 + add a6,x_src,a6 + ld x_ptr, 0(a6) + add x_ptr,x_ptr,x_pos + + vle8.v v_src, (x_ptr) /* load from: src base + pos offset */ + /* split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* gf_tbl addr: (x_tbl + dest_idx * x_vec * 32) + src_vec_idx * 32 */ + /* load gf_table's */ + vle8.v v_gft1_lo, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + vle8.v v_gft1_hi, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + + vle8.v v_gft2_lo, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + vle8.v v_gft2_hi, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + + /* dest 1 */ + /* table indexing, ie. gf(2^8) multiplication */ + vrgather.vv v26, v_gft1_lo, v_src_lo + vrgather.vv v27, v_gft1_hi, v_src_hi + /* exclusive or, ie. gf(2^8) add */ + vxor.vv v_dest1, v_dest1, v26 + vxor.vv v_dest1, v_dest1, v27 + + /* dest 2 */ + vrgather.vv v26, v_gft2_lo, v_src_lo + vrgather.vv v27, v_gft2_hi, v_src_hi + vxor.vv v_dest2, v_dest2, v26 + vxor.vv v_dest2, v_dest2, v27 + + /* calc for next */ + addi x_vec_i, x_vec_i, 1 /* move x_vec_i to next */ + blt x_vec_i, x_vec, .Llooprvv_vl_vects +/* end of Loop 2 */ + + /* store dest data */ + vse8.v v_dest1, (x_dest1) + vse8.v v_dest2, (x_dest2) + add x_dest1,x_dest1,a5 + add x_dest2,x_dest2,a5 + + /* increment one vector length */ + add x_pos, x_pos, a5 + j .Llooprvv_vl +/* end of Loop 1 */ + +.return_pass: + li a0, 0 + ret + +.return_fail: + li a0, 1 + ret + +#endif diff --git a/erasure_code/riscv64/gf_2vect_mad_rvv.S b/erasure_code/riscv64/gf_2vect_mad_rvv.S new file mode 100644 index 0000000..fb90f3a --- /dev/null +++ b/erasure_code/riscv64/gf_2vect_mad_rvv.S @@ -0,0 +1,148 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_2vect_mad_rvv +.type gf_2vect_mad_rvv, @function + +/* gf_2vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); + */ +/* arguments */ +#define x_len a0 +#define x_vec a1 +#define x_vec_i a2 +#define x_tbl a3 +#define x_src a4 +#define x_dest a5 + +/* returns */ +#define w_ret a0 + +/* local variables */ +#define x_pos t0 +#define x_dest2 t1 +#define x_dest1 t2 + +/* vectors */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v1 +#define v_dest1 v3 +#define v_tmp_lo v4 +#define v_tmp_hi v5 +#define v_gft1_lo v6 +#define v_gft1_hi v7 +#define v_gft2_lo v17 +#define v_gft2_hi v18 +#define v_dest2 v27 + +gf_2vect_mad_rvv: + /* less than 16 bytes, return_fail */ + li t3, 16 + blt x_len, t3, .return_fail + + vsetvli t4, x0, e8, m1 + + /* load table 1 */ + slli t3, x_vec_i, 5 + add x_tbl, x_tbl, t3 + vle8.v v_gft1_lo, (x_tbl) + addi t3, x_tbl, 16 + vle8.v v_gft1_hi, (t3) + + /* load table 2 */ + slli t3, x_vec, 5 + add x_tbl, x_tbl, t3 + vle8.v v_gft2_lo, (x_tbl) + addi t3, x_tbl, 16 + vle8.v v_gft2_hi, (t3) + + /* load dest pointers */ + ld x_dest1, 0(x_dest) + ld x_dest2, 8(x_dest) + + li x_pos, 0 + +.Llooprvv_vl: + blt x_pos, x_len, .Lloop_body + j .return_pass + +.Lloop_body: + /* load src data */ + add t3, x_src, x_pos + vle8.v v_src, (t3) + + /* split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* load dest data */ + add t3, x_dest1, x_pos + vle8.v v_dest1, (t3) + add t3, x_dest2, x_pos + vle8.v v_dest2, (t3) + + /* dest1 */ + /* table indexing, ie. gf(2^8) multiplication */ + vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi + /* exclusive or, ie. gf(2^8) add */ + vxor.vv v_dest1, v_tmp_lo, v_dest1 + vxor.vv v_dest1, v_tmp_hi, v_dest1 + + /* dest2 */ + vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi + vxor.vv v_dest2, v_tmp_lo, v_dest2 + vxor.vv v_dest2, v_tmp_hi, v_dest2 + + /* store dest data */ + add t3, x_dest1, x_pos + vse8.v v_dest1, (t3) + add t3, x_dest2, x_pos + vse8.v v_dest2, (t3) + + /* increment one vector length */ + add x_pos, x_pos, t4 + + j .Llooprvv_vl + +.return_pass: + li w_ret, 0 + ret + +.return_fail: + li w_ret, 1 + ret + +#endif diff --git a/erasure_code/riscv64/gf_3vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_3vect_dot_prod_rvv.S new file mode 100644 index 0000000..c617ab3 --- /dev/null +++ b/erasure_code/riscv64/gf_3vect_dot_prod_rvv.S @@ -0,0 +1,188 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_3vect_dot_prod_rvv +.type gf_3vect_dot_prod_rvv, @function + +/* void gf_3vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + */ + +/* arguments */ +#define x_len a0 /* vector length */ +#define x_vec a1 /* number of source vectors (ie. data blocks) */ +#define x_tbl a2 /* gftbls */ +#define x_src a3 /* src */ +#define x_dest a4 /* dest */ + +/* local variables */ +#define x_vec_i t1 +#define x_ptr t2 +#define x_pos t3 +#define x_tbl1 t4 +#define x_tbl2 t5 +#define x_tbl3 t6 +#define x_dest1 s0 +#define x_dest2 s1 +#define x_dest3 a5 +#define t_offset a6 + + +/* vectors */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v3 + +#define v_dest1 v4 +#define v_dest2 v5 +#define v_dest3 v6 + +#define v_gft1_lo v8 +#define v_gft1_hi v9 +#define v_gft2_lo v10 +#define v_gft2_hi v11 +#define v_gft3_lo v12 +#define v_gft3_hi v13 + +gf_3vect_dot_prod_rvv: + /* less than 16 bytes, return_fail */ + li t0, 16 + blt x_len, t0, .return_fail + + /* save callee-saved registers */ + addi sp, sp, -16 + sd s0, 0(sp) + sd s1, 8(sp) + + vsetvli a7, x0, e8, m1 /* Set vector length to maximum */ + + li x_pos, 0 + slli t_offset, x_vec, 5 + ld x_dest1, 0(x_dest) + ld x_dest2, 8(x_dest) + ld x_dest3, 16(x_dest) + +.Lloop_rvv_vl: + /* check if we have processed all elements */ + bge x_pos, x_len, .return_pass + + /* Clear destination vectors */ + vmv.v.i v_dest1, 0 + vmv.v.i v_dest2, 0 + vmv.v.i v_dest3, 0 + + /* Reset table pointers */ + mv x_tbl1, x_tbl + add x_tbl2, x_tbl1, t_offset + add x_tbl3, x_tbl2, t_offset + + /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ + li x_vec_i, 0 +.Lloop_rvv_vl_vects: + /* Load source data */ + slli t0, x_vec_i, 3 + add t0,x_src,t0 + ld x_ptr, 0(t0) + add x_ptr,x_ptr,x_pos + + vle8.v v_src, (x_ptr) + + /* Split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* Load gf_table's */ + vle8.v v_gft1_lo, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + vle8.v v_gft1_hi, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + vle8.v v_gft2_lo, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + vle8.v v_gft2_hi, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + + + /* Load next gf_table's */ + vle8.v v_gft3_lo, (x_tbl3) + addi x_tbl3, x_tbl3, 16 + vle8.v v_gft3_hi, (x_tbl3) + addi x_tbl3, x_tbl3, 16 + +/* dest 1 */ + vrgather.vv v26, v_gft1_lo, v_src_lo + vrgather.vv v27, v_gft1_hi, v_src_hi + vxor.vv v_dest1, v_dest1, v26 + vxor.vv v_dest1, v_dest1, v27 + + /* dest 2 */ + vrgather.vv v26, v_gft2_lo, v_src_lo + vrgather.vv v27, v_gft2_hi, v_src_hi + vxor.vv v_dest2, v_dest2, v26 + vxor.vv v_dest2, v_dest2, v27 + + /* dest 3 */ + vrgather.vv v26, v_gft3_lo, v_src_lo + vrgather.vv v27, v_gft3_hi, v_src_hi + vxor.vv v_dest3, v_dest3, v26 + vxor.vv v_dest3, v_dest3, v27 + + /* Move to next source vector */ + addi x_vec_i, x_vec_i, 1 + + /* Check if we have processed all vectors */ + blt x_vec_i, x_vec, .Lloop_rvv_vl_vects + + /* Store destination data */ + vse8.v v_dest1, (x_dest1) + vse8.v v_dest2, (x_dest2) + vse8.v v_dest3, (x_dest3) + add x_dest1,x_dest1, a7 + add x_dest2,x_dest2, a7 + add x_dest3,x_dest3, a7 + + add x_pos, x_pos, a7 + j .Lloop_rvv_vl + +.return_pass: + ld s0, 0(sp) + ld s1, 8(sp) + addi sp, sp, 16 + + li a0, 0 + ret + +.return_fail: + li a0, 1 + ret + +#endif diff --git a/erasure_code/riscv64/gf_3vect_mad_rvv.S b/erasure_code/riscv64/gf_3vect_mad_rvv.S new file mode 100644 index 0000000..8d33471 --- /dev/null +++ b/erasure_code/riscv64/gf_3vect_mad_rvv.S @@ -0,0 +1,170 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_3vect_mad_rvv +.type gf_3vect_mad_rvv, @function + +/* gf_3vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); + */ +/* arguments */ +#define x_len a0 +#define x_vec a1 +#define x_vec_i a2 +#define x_tbl a3 +#define x_src a4 +#define x_dest a5 + +/* returns */ +#define w_ret a0 + +/* local variables */ +#define x_pos t0 +#define x_dest1 t1 +#define x_dest2 t2 +#define x_dest3 t3 + +/* vectors */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v3 +#define v_dest1 v4 +#define v_tmp_lo v5 +#define v_tmp_hi v6 +#define v_gft1_lo v7 +#define v_gft1_hi v8 +#define v_gft2_lo v9 +#define v_gft2_hi v10 +#define v_gft3_lo v11 +#define v_gft3_hi v12 +#define v_dest2 v19 +#define v_dest3 v20 + +gf_3vect_mad_rvv: + /* less than 16 bytes, return_fail */ + li t4, 16 + blt x_len, t4, .return_fail + + vsetvli t5, x0, e8, m1 + + /* Load table 1 */ + slli t4, x_vec_i, 5 + add x_tbl, x_tbl, t4 + vle8.v v_gft1_lo, (x_tbl) + addi t4, x_tbl, 16 + vle8.v v_gft1_hi, (t4) + + /* Load table 2 */ + slli t4, x_vec, 5 + add x_tbl, x_tbl, t4 + vle8.v v_gft2_lo, (x_tbl) + addi t4, x_tbl, 16 + vle8.v v_gft2_hi, (t4) + + /* Load table 3 */ + slli t4, x_vec, 5 + add x_tbl, x_tbl, t4 + vle8.v v_gft3_lo, (x_tbl) + addi t4, x_tbl, 16 + vle8.v v_gft3_hi, (t4) + + + /* Load destination pointers */ + ld x_dest1, 0(x_dest) + ld x_dest2, 8(x_dest) + ld x_dest3, 16(x_dest) + + li x_pos, 0 + +.Llooprvv_vl: + blt x_pos, x_len, .Lloop_body + j .return_pass + +.Lloop_body: + /* Load source data */ + add t6, x_src, x_pos + vle8.v v_src, (t6) + + /* Split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* load dest data */ + add t6, x_dest1, x_pos + vle8.v v_dest1, (t6) + add t6, x_dest2, x_pos + vle8.v v_dest2, (t6) + add t6, x_dest3, x_pos + vle8.v v_dest3, (t6) + + /* dest1 */ + vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi + vxor.vv v_dest1, v_tmp_lo, v_dest1 + vxor.vv v_dest1, v_tmp_hi, v_dest1 + + /* dest2 */ + vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi + vxor.vv v_dest2, v_tmp_lo, v_dest2 + vxor.vv v_dest2, v_tmp_hi, v_dest2 + + /* dest3 */ + vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi + vxor.vv v_dest3, v_tmp_lo, v_dest3 + vxor.vv v_dest3, v_tmp_hi, v_dest3 + + + /* Store destination data */ + add t6, x_dest1, x_pos + vse8.v v_dest1, (t6) + add t6, x_dest2, x_pos + vse8.v v_dest2, (t6) + add t6, x_dest3, x_pos + vse8.v v_dest3, (t6) + + /* Increment position */ + add x_pos, x_pos, t5 + + j .Llooprvv_vl + +.return_pass: + li w_ret, 0 + ret + +.return_fail: + li w_ret, 1 + ret + +#endif diff --git a/erasure_code/riscv64/gf_4vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_4vect_dot_prod_rvv.S new file mode 100644 index 0000000..ace146d --- /dev/null +++ b/erasure_code/riscv64/gf_4vect_dot_prod_rvv.S @@ -0,0 +1,214 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_4vect_dot_prod_rvv +.type gf_4vect_dot_prod_rvv, @function + +/* void gf_4vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + */ + +/* arguments */ +#define x_len a0 /* vector length */ +#define x_vec a1 /* number of source vectors (ie. data blocks) */ +#define x_tbl a2 +#define x_src a3 +#define x_dest a4 + +/* local variables */ +#define x_vec_i a7 +#define x_ptr t1 +#define x_pos t2 + +#define x_tbl1 t3 +#define x_tbl2 t4 +#define x_tbl3 t5 +#define x_tbl4 t6 +#define x_dest1 s0 +#define x_dest2 s1 +#define x_dest3 s2 +#define x_dest4 s3 +#define t_offset a5 + +/* vectors */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v3 + +#define v_dest1 v4 +#define v_dest2 v5 +#define v_dest3 v6 +#define v_dest4 v7 + +#define v_gft1_lo v8 +#define v_gft1_hi v9 +#define v_gft2_lo v10 +#define v_gft2_hi v11 +#define v_gft3_lo v12 +#define v_gft3_hi v13 +#define v_gft4_lo v14 +#define v_gft4_hi v15 + +gf_4vect_dot_prod_rvv: + /* less than 16 bytes, return_fail */ + li t0, 16 + blt x_len, t0, .return_fail + +/* save callee-saved registers */ + addi sp, sp, -32 + sd s0, 0(sp) + sd s1, 8(sp) + sd s2, 16(sp) + sd s3, 24(sp) + + vsetvli t0, x0, e8, m1 /* Set vector length to maximum */ + + li x_pos, 0 + slli t_offset, x_vec, 5 + ld x_dest1, 0(x_dest) + ld x_dest2, 8(x_dest) + ld x_dest3, 16(x_dest) + ld x_dest4, 24(x_dest) + +/* Loop 1: x_len, vector length */ +.Lloop_rvv_vl: + /* check if we have processed all elements */ + bge x_pos, x_len, .return_pass + + /* Clear destination vectors */ + vmv.v.i v_dest1, 0 + vmv.v.i v_dest2, 0 + vmv.v.i v_dest3, 0 + vmv.v.i v_dest4, 0 + + /* Reset table pointers */ + mv x_tbl1, x_tbl + add x_tbl2, x_tbl1, t_offset + add x_tbl3, x_tbl2, t_offset + add x_tbl4, x_tbl3, t_offset + + /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ + li x_vec_i, 0 +.Lloop_rvv_vl_vects: + /* Load source data */ + slli a6, x_vec_i, 3 + add a6,x_src,a6 + ld x_ptr, 0(a6) + add x_ptr,x_ptr,x_pos + + vle8.v v_src, (x_ptr) + + /* Split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* Load gf_table's */ + vle8.v v_gft1_lo, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + vle8.v v_gft1_hi, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + vle8.v v_gft2_lo, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + vle8.v v_gft2_hi, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + + + /* Load next gf_table's */ + vle8.v v_gft3_lo, (x_tbl3) + addi x_tbl3, x_tbl3, 16 + vle8.v v_gft3_hi, (x_tbl3) + addi x_tbl3, x_tbl3, 16 + + vle8.v v_gft4_lo, (x_tbl4) + addi x_tbl4, x_tbl4, 16 + vle8.v v_gft4_hi, (x_tbl4) + addi x_tbl4, x_tbl4, 16 + + /* dest 1 */ + vrgather.vv v26, v_gft1_lo, v_src_lo + vrgather.vv v27, v_gft1_hi, v_src_hi + vxor.vv v_dest1, v_dest1, v26 + vxor.vv v_dest1, v_dest1, v27 + + /* dest 2 */ + vrgather.vv v26, v_gft2_lo, v_src_lo + vrgather.vv v27, v_gft2_hi, v_src_hi + vxor.vv v_dest2, v_dest2, v26 + vxor.vv v_dest2, v_dest2, v27 + + /* dest 3 */ + vrgather.vv v26, v_gft3_lo, v_src_lo + vrgather.vv v27, v_gft3_hi, v_src_hi + vxor.vv v_dest3, v_dest3, v26 + vxor.vv v_dest3, v_dest3, v27 + + /* dest 4 */ + vrgather.vv v26, v_gft4_lo, v_src_lo + vrgather.vv v27, v_gft4_hi, v_src_hi + vxor.vv v_dest4, v_dest4, v26 + vxor.vv v_dest4, v_dest4, v27 + + /* Move to next source vector */ + addi x_vec_i, x_vec_i, 1 + + /* Check if we have processed all vectors */ + blt x_vec_i, x_vec, .Lloop_rvv_vl_vects + + /* Store destination data */ + vse8.v v_dest1, (x_dest1) + vse8.v v_dest2, (x_dest2) + vse8.v v_dest3, (x_dest3) + vse8.v v_dest4, (x_dest4) + add x_dest1,x_dest1, t0 + add x_dest2,x_dest2, t0 + add x_dest3,x_dest3, t0 + add x_dest4,x_dest4, t0 + /* Increment position */ + add x_pos, x_pos, t0 + j .Lloop_rvv_vl + +.return_pass: +/* restore callee-saved registers */ + ld s0, 0(sp) + ld s1, 8(sp) + ld s2, 16(sp) + ld s3, 24(sp) + addi sp, sp, 32 + li a0, 0 + ret + +.return_fail: + li a0, 1 + ret + +#endif diff --git a/erasure_code/riscv64/gf_4vect_mad_rvv.S b/erasure_code/riscv64/gf_4vect_mad_rvv.S new file mode 100644 index 0000000..48b35ea --- /dev/null +++ b/erasure_code/riscv64/gf_4vect_mad_rvv.S @@ -0,0 +1,189 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_4vect_mad_rvv +.type gf_4vect_mad_rvv, @function + +/* gf_4vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); + */ +/* arguments */ +#define x_len a0 +#define x_vec a1 +#define x_vec_i a2 +#define x_tbl a3 +#define x_src a4 +#define x_dest a5 + +/* returns */ +#define w_ret a0 + +/* local variables */ +#define x_pos t0 +#define x_dest1 t1 +#define x_dest2 t2 +#define x_dest3 t3 +#define x_dest4 t4 + +/* vectors */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v3 +#define v_dest1 v4 +#define v_tmp_lo v5 +#define v_tmp_hi v6 +#define v_gft1_lo v7 +#define v_gft1_hi v8 +#define v_gft2_lo v9 +#define v_gft2_hi v10 +#define v_gft3_lo v11 +#define v_gft3_hi v12 +#define v_gft4_lo v13 +#define v_gft4_hi v14 +#define v_dest2 v15 +#define v_dest3 v16 +#define v_dest4 v17 + +gf_4vect_mad_rvv: + /* less than 16 bytes, return_fail */ + li t5, 16 + blt x_len, t5, .return_fail + + vsetvli t6, x0, e8, m1 + + /* load table 1 */ + slli t5, x_vec_i, 5 + add x_tbl, x_tbl, t5 + vle8.v v_gft1_lo, (x_tbl) + addi t5, x_tbl, 16 + vle8.v v_gft1_hi, (t5) + + /* load table 2 */ + slli t5, x_vec, 5 + add x_tbl, x_tbl, t5 + vle8.v v_gft2_lo, (x_tbl) + addi t5, x_tbl, 16 + vle8.v v_gft2_hi, (t5) + + /* load table 3 */ + slli t5, x_vec, 5 + add x_tbl, x_tbl, t5 + vle8.v v_gft3_lo, (x_tbl) + addi t5, x_tbl, 16 + vle8.v v_gft3_hi, (t5) + + /* load table 4 */ + slli t5, x_vec, 5 + add x_tbl, x_tbl, t5 + vle8.v v_gft4_lo, (x_tbl) + addi t5, x_tbl, 16 + vle8.v v_gft4_hi, (t5) + + /* load dest pointers */ + ld x_dest1, 0(x_dest) + ld x_dest2, 8(x_dest) + ld x_dest3, 16(x_dest) + ld x_dest4, 24(x_dest) + + li x_pos, 0 + +.Llooprvv_vl: + blt x_pos, x_len, .Lloop_body + j .return_pass +.Lloop_body: + /* load src data */ + add t5, x_src, x_pos + vle8.v v_src, (t5) + + /* split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* load dest data */ + add t5, x_dest1, x_pos + vle8.v v_dest1, (t5) + add t5, x_dest2, x_pos + vle8.v v_dest2, (t5) + add t5, x_dest3, x_pos + vle8.v v_dest3, (t5) + add t5, x_dest4, x_pos + vle8.v v_dest4, (t5) + + /* dest1 */ + /* table indexing, ie. gf(2^8) multiplication */ + vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi + /* exclusive or, ie. gf(2^8) add */ + vxor.vv v_dest1, v_tmp_lo, v_dest1 + vxor.vv v_dest1, v_tmp_hi, v_dest1 + + /* dest2 */ + vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi + vxor.vv v_dest2, v_tmp_lo, v_dest2 + vxor.vv v_dest2, v_tmp_hi, v_dest2 + + /* dest3 */ + vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi + vxor.vv v_dest3, v_tmp_lo, v_dest3 + vxor.vv v_dest3, v_tmp_hi, v_dest3 + + /* dest4 */ + vrgather.vv v_tmp_lo, v_gft4_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft4_hi, v_src_hi + vxor.vv v_dest4, v_tmp_lo, v_dest4 + vxor.vv v_dest4, v_tmp_hi, v_dest4 + + /* store dest data */ + add t5, x_dest1, x_pos + vse8.v v_dest1, (t5) + add t5, x_dest2, x_pos + vse8.v v_dest2, (t5) + add t5, x_dest3, x_pos + vse8.v v_dest3, (t5) + add t5, x_dest4, x_pos + vse8.v v_dest4, (t5) + + add x_pos, x_pos, t6 + j .Llooprvv_vl + +.return_pass: + li w_ret, 0 + ret + +.return_fail: + li w_ret, 1 + ret + +#endif diff --git a/erasure_code/riscv64/gf_5vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_5vect_dot_prod_rvv.S new file mode 100644 index 0000000..0b5cf3e --- /dev/null +++ b/erasure_code/riscv64/gf_5vect_dot_prod_rvv.S @@ -0,0 +1,242 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_5vect_dot_prod_rvv +.type gf_5vect_dot_prod_rvv, @function + +/* void gf_5vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + */ + +/* arguments */ +#define x_len a0 /* vector length */ +#define x_vec a1 /* number of source vectors (ie. data blocks) */ +#define x_tbl a2 +#define x_src a3 +#define x_dest a4 + +/* local variables */ +#define x_vec_i a7 +#define x_ptr t1 +#define x_pos t2 +#define x_tbl1 t3 +#define x_tbl2 t4 +#define x_tbl3 t5 +#define x_tbl4 t6 + +#define x_tbl5 s0 +#define x_dest1 s1 +#define x_dest2 s2 +#define x_dest3 s3 +#define x_dest4 s4 +#define x_dest5 s5 + +/* vectors */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v3 +#define v_dest1 v4 +#define v_gft1_lo v5 +#define v_gft1_hi v6 +#define v_gft2_lo v7 +#define v_gft2_hi v8 +#define v_gft3_lo v9 +#define v_gft3_hi v10 +#define v_gft4_lo v11 +#define v_gft4_hi v12 +#define v_gft5_lo v13 +#define v_gft5_hi v14 +#define v_dest2 v15 +#define v_dest3 v16 +#define v_dest4 v17 +#define v_dest5 v18 + +gf_5vect_dot_prod_rvv: + /* less than 16 bytes, return_fail */ + li t0, 16 + blt x_len, t0, .return_fail + + /* save s0-s4 */ + addi sp, sp, -48 + sd s0, 0(sp) + sd s1, 8(sp) + sd s2, 16(sp) + sd s3, 24(sp) + sd s4, 32(sp) + sd s5, 40(sp) + + vsetvli a5, x0, e8, m1 + + /* Initialize position */ + li x_pos, 0 + + /* Load destination pointers */ + ld x_dest1, 0(x_dest) + ld x_dest2, 8(x_dest) + ld x_dest3, 16(x_dest) + ld x_dest4, 24(x_dest) + ld x_dest5, 32(x_dest) + +/* Loop 1: x_len, vector length */ +.Llooprvv_vl: + bge x_pos, x_len, .return_pass + + /* Clear destination vectors */ + vmv.v.i v_dest1, 0 + vmv.v.i v_dest2, 0 + vmv.v.i v_dest3, 0 + vmv.v.i v_dest4, 0 + vmv.v.i v_dest5, 0 + + /* Reset table pointers */ + mv x_tbl1, x_tbl + slli t0, x_vec, 5 + add x_tbl2, x_tbl1, t0 + add x_tbl3, x_tbl2, t0 + add x_tbl4, x_tbl3, t0 + add x_tbl5, x_tbl4, t0 + + /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ + li x_vec_i, 0 +.Llooprvv_vl_vects: + /* Load source data */ + slli a6, x_vec_i, 3 + add a6,x_src,a6 + ld x_ptr, 0(a6) + add x_ptr, x_ptr, x_pos + vle8.v v_src, (x_ptr) + + /* Split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* Load gf_table's */ + vle8.v v_gft1_lo, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + vle8.v v_gft1_hi, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + + vle8.v v_gft2_lo, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + vle8.v v_gft2_hi, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + + /* Move to next source vector */ + addi x_vec_i, x_vec_i, 1 + + /* dest 1 */ + vrgather.vv v26, v_gft1_lo, v_src_lo + vrgather.vv v27, v_gft1_hi, v_src_hi + vxor.vv v_dest1, v_dest1, v26 + vxor.vv v_dest1, v_dest1, v27 + + /* Load more gf_table's */ + vle8.v v_gft3_lo, (x_tbl3) + addi x_tbl3, x_tbl3, 16 + vle8.v v_gft3_hi, (x_tbl3) + addi x_tbl3, x_tbl3, 16 + + vle8.v v_gft4_lo, (x_tbl4) + addi x_tbl4, x_tbl4, 16 + vle8.v v_gft4_hi, (x_tbl4) + addi x_tbl4, x_tbl4, 16 + + /* dest 2 */ + vrgather.vv v26, v_gft2_lo, v_src_lo + vrgather.vv v27, v_gft2_hi, v_src_hi + vxor.vv v_dest2, v_dest2, v26 + vxor.vv v_dest2, v_dest2, v27 + + /* dest 3 */ + vrgather.vv v26, v_gft3_lo, v_src_lo + vrgather.vv v27, v_gft3_hi, v_src_hi + vxor.vv v_dest3, v_dest3, v26 + vxor.vv v_dest3, v_dest3, v27 + + /* Load more gf_table's */ + vle8.v v_gft5_lo, (x_tbl5) + addi x_tbl5, x_tbl5, 16 + vle8.v v_gft5_hi, (x_tbl5) + addi x_tbl5, x_tbl5, 16 + + /* dest 4 */ + vrgather.vv v26, v_gft4_lo, v_src_lo + vrgather.vv v27, v_gft4_hi, v_src_hi + vxor.vv v_dest4, v_dest4, v26 + vxor.vv v_dest4, v_dest4, v27 + + /* dest 5 */ + vrgather.vv v26, v_gft5_lo, v_src_lo + vrgather.vv v27, v_gft5_hi, v_src_hi + vxor.vv v_dest5, v_dest5, v26 + vxor.vv v_dest5, v_dest5, v27 + + /* Check if we have processed all vectors */ + blt x_vec_i, x_vec, .Llooprvv_vl_vects + + + vse8.v v_dest1, (x_dest1) + vse8.v v_dest2, (x_dest2) + vse8.v v_dest3, (x_dest3) + vse8.v v_dest4, (x_dest4) + vse8.v v_dest5, (x_dest5) + + /* Store destination data */ + add x_dest1,x_dest1,a5 + add x_dest2,x_dest2,a5 + add x_dest3,x_dest3,a5 + add x_dest4,x_dest4,a5 + add x_dest5,x_dest5,a5 + + /* Increment position */ + add x_pos, x_pos, a5 + j .Llooprvv_vl + +.return_pass: + /* Restore callee-saved registers */ + ld s0, 0(sp) + ld s1, 8(sp) + ld s2, 16(sp) + ld s3, 24(sp) + ld s4, 32(sp) + ld s5, 40(sp) + addi sp, sp, 48 + + li a0, 0 + ret + +.return_fail: + li a0, 1 + ret + +#endif diff --git a/erasure_code/riscv64/gf_5vect_mad_rvv.S b/erasure_code/riscv64/gf_5vect_mad_rvv.S new file mode 100644 index 0000000..57227ed --- /dev/null +++ b/erasure_code/riscv64/gf_5vect_mad_rvv.S @@ -0,0 +1,214 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_5vect_mad_rvv +.type gf_5vect_mad_rvv, @function + +/* gf_5vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); + */ +/* arguments */ +#define x_len a0 +#define x_vec a1 +#define x_vec_i a2 +#define x_tbl a3 +#define x_src a4 +#define x_dest a5 + +/* returns */ +#define w_ret a0 + +/* local variables */ +#define x_pos t0 +#define x_dest1 t1 +#define x_dest2 t2 +#define x_dest3 t3 +#define x_dest4 t4 +#define x_dest5 t5 + +/* vectors */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v3 +#define v_dest1 v4 +#define v_tmp_lo v5 +#define v_tmp_hi v6 +#define v_gft1_lo v7 +#define v_gft1_hi v8 +#define v_gft2_lo v9 +#define v_gft2_hi v10 +#define v_gft3_lo v11 +#define v_gft3_hi v12 +#define v_gft4_lo v13 +#define v_gft4_hi v14 +#define v_gft5_lo v15 +#define v_gft5_hi v16 +#define v_dest2 v19 +#define v_dest3 v20 +#define v_dest4 v21 +#define v_dest5 v22 + +gf_5vect_mad_rvv: + /* less than 16 bytes, return_fail */ + li t6, 16 + blt x_len, t6, .return_fail + + vsetvli a7, x0, e8, m1 + + /* Load table 1 */ + slli a6, x_vec_i, 5 + add x_tbl, x_tbl, a6 + vle8.v v_gft1_lo, (x_tbl) + addi a6, x_tbl, 16 + vle8.v v_gft1_hi, (a6) + + /* Load table 2 */ + slli a6, x_vec, 5 + add x_tbl, x_tbl, a6 + vle8.v v_gft2_lo, (x_tbl) + addi a6, x_tbl, 16 + vle8.v v_gft2_hi, (a6) + + /* Load table 3 */ + slli a6, x_vec, 5 + add x_tbl, x_tbl, a6 + vle8.v v_gft3_lo, (x_tbl) + addi a6, x_tbl, 16 + vle8.v v_gft3_hi, (a6) + + /* Load table 4 */ + slli a6, x_vec, 5 + add x_tbl, x_tbl, a6 + vle8.v v_gft4_lo, (x_tbl) + addi a6, x_tbl, 16 + vle8.v v_gft4_hi, (a6) + + /* Load table 5 */ + slli a6, x_vec, 5 + add x_tbl, x_tbl, a6 + vle8.v v_gft5_lo, (x_tbl) + addi a6, x_tbl, 16 + vle8.v v_gft5_hi, (a6) + + + /* Load destination pointers */ + ld x_dest1, 0(x_dest) + ld x_dest2, 8(x_dest) + ld x_dest3, 16(x_dest) + ld x_dest4, 24(x_dest) + ld x_dest5, 32(x_dest) + + li x_pos, 0 + +.Llooprvv_vl: + blt x_pos, x_len, .Lloop_body + j .return_pass + +.Lloop_body: + /* Load source data */ + add t6, x_src, x_pos + vle8.v v_src, (t6) + + /* Split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* load dest data */ + add t6, x_dest1, x_pos + vle8.v v_dest1, (t6) + add t6, x_dest2, x_pos + vle8.v v_dest2, (t6) + add t6, x_dest3, x_pos + vle8.v v_dest3, (t6) + add t6, x_dest4, x_pos + vle8.v v_dest4, (t6) + add t6, x_dest5, x_pos + vle8.v v_dest5, (t6) + + /* dest1 */ + vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi + vxor.vv v_dest1, v_tmp_lo, v_dest1 + vxor.vv v_dest1, v_tmp_hi, v_dest1 + + /* dest2 */ + vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi + vxor.vv v_dest2, v_tmp_lo, v_dest2 + vxor.vv v_dest2, v_tmp_hi, v_dest2 + + /* dest3 */ + vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi + vxor.vv v_dest3, v_tmp_lo, v_dest3 + vxor.vv v_dest3, v_tmp_hi, v_dest3 + + /* dest4 */ + vrgather.vv v_tmp_lo, v_gft4_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft4_hi, v_src_hi + vxor.vv v_dest4, v_tmp_lo, v_dest4 + vxor.vv v_dest4, v_tmp_hi, v_dest4 + + /* dest5 */ + vrgather.vv v_tmp_lo, v_gft5_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft5_hi, v_src_hi + vxor.vv v_dest5, v_tmp_lo, v_dest5 + vxor.vv v_dest5, v_tmp_hi, v_dest5 + + + /* Store destination data */ + add t6, x_dest1, x_pos + vse8.v v_dest1, (t6) + add t6, x_dest2, x_pos + vse8.v v_dest2, (t6) + add t6, x_dest3, x_pos + vse8.v v_dest3, (t6) + add t6, x_dest4, x_pos + vse8.v v_dest4, (t6) + add t6, x_dest5, x_pos + vse8.v v_dest5, (t6) + + /* Increment position */ + add x_pos, x_pos, a7 + + j .Llooprvv_vl + +.return_pass: + li w_ret, 0 + ret + +.return_fail: + li w_ret, 1 + ret + +#endif diff --git a/erasure_code/riscv64/gf_6vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_6vect_dot_prod_rvv.S new file mode 100644 index 0000000..6cc9a16 --- /dev/null +++ b/erasure_code/riscv64/gf_6vect_dot_prod_rvv.S @@ -0,0 +1,273 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_6vect_dot_prod_rvv +.type gf_6vect_dot_prod_rvv, @function + +/* void gf_6vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + */ + +/* arguments */ +#define x_len a0 /* vector length */ +#define x_vec a1 /* number of source vectors (ie. data blocks) */ +#define x_tbl a2 /* gftbls */ +#define x_src a3 /* src */ +#define x_dest a4 /* dest */ + +/* local variables */ +#define x_vec_i a7 /* loop counter for vectors */ +#define x_ptr t1 /* pointer to current src */ +#define x_pos t2 /* position in vector */ +#define x_tbl1 t3 /* table pointer 1 */ +#define x_tbl2 t4 /* table pointer 2 */ +#define x_tbl3 t5 /* table pointer 3 */ +#define x_tbl4 t6 /* table pointer 4 */ +#define x_tbl5 s0 /* table pointer 5 */ +#define x_tbl6 s1 /* table pointer 6 */ +#define x_dest1 s2 /* dest pointer 1 */ +#define x_dest2 s3 /* dest pointer 2 */ +#define x_dest3 s4 /* dest pointer 3 */ +#define x_dest4 s5 /* dest pointer 4 t12 -- x28 */ +#define x_dest5 s6 /* dest pointer 5 */ +#define x_dest6 s7 /* dest pointer 6 */ + +/* vector registers */ +#define v_src v1 /* source vector */ +#define v_src_lo v2 /* low 4 bits of source */ +#define v_src_hi v3 /* high 4 bits of source */ +#define v_dest1 v4 /* destination vector 1 */ +#define v_dest2 v5 /* destination vector 2 */ +#define v_dest3 v6 /* destination vector 3 */ +#define v_dest4 v7 /* destination vector 4 */ +#define v_dest5 v8 /* destination vector 5 */ +#define v_dest6 v9 /* destination vector 6 */ +#define v_gft1_lo v10 /* gf table 1 low */ +#define v_gft1_hi v11 /* gf table 1 high */ +#define v_gft2_lo v12 /* gf table 2 low */ +#define v_gft2_hi v13 /* gf table 2 high */ +#define v_gft3_lo v14 /* gf table 3 low */ +#define v_gft3_hi v15 /* gf table 3 high */ +#define v_gft4_lo v16 /* gf table 4 low */ +#define v_gft4_hi v17 /* gf table 4 high */ +#define v_gft5_lo v18 /* gf table 5 low */ +#define v_gft5_hi v19 /* gf table 5 high */ +#define v_gft6_lo v20 /* gf table 6 low */ +#define v_gft6_hi v21 /* gf table 6 high */ + +gf_6vect_dot_prod_rvv: + /* less than 16 bytes, return_fail */ + li t0, 16 + blt x_len, t0, .return_fail + + /* save callee-saved registers */ + addi sp, sp, -64 + sd s0, 0(sp) + sd s1, 8(sp) + sd s2, 16(sp) + sd s3, 24(sp) + sd s4, 32(sp) + sd s5, 40(sp) + sd s6, 48(sp) + sd s7, 56(sp) + + li t0, 0x0F + vsetvli a5, x0, e8, m1 + + /* initialize position */ + li x_pos, 0 + + /* load destination pointers */ + ld x_dest1, 0(x14) # a4 is also x14 + ld x_dest2, 8(x_dest) + ld x_dest3, 16(x_dest) + ld x_dest4, 24(x_dest) + ld x_dest5, 32(x_dest) + ld x_dest6, 40(x_dest) + +.Llooprvv_vl: + /* check if we have processed all elements */ + bge x_pos, x_len, .return_pass + + /* initialize vector loop counter */ + li x_vec_i, 0 + + /* load source pointer */ + ld x_ptr, 0(x_src) + + /* clear destination vectors */ + vmv.v.i v_dest1, 0 + vmv.v.i v_dest2, 0 + vmv.v.i v_dest3, 0 + vmv.v.i v_dest4, 0 + vmv.v.i v_dest5, 0 + vmv.v.i v_dest6, 0 + + /* initialize table pointers */ + /* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */ + mv x_tbl1, x_tbl + slli t0, x_vec, 5 + add x_tbl2, x_tbl1, t0 + add x_tbl3, x_tbl2, t0 + add x_tbl4, x_tbl3, t0 + add x_tbl5, x_tbl4, t0 + add x_tbl6, x_tbl5, t0 + +.Llooprvv_vl_vects: + /* load source data */ + slli a6, x_vec_i, 3 + add a6,x_src,a6 + ld x_ptr, 0(a6) + add x_ptr,x_ptr,x_pos + + vle8.v v_src, (x_ptr) + + + /* split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* load gf_table's */ + vle8.v v_gft1_lo, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + vle8.v v_gft1_hi, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + + vle8.v v_gft2_lo, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + vle8.v v_gft2_hi, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + + vle8.v v_gft3_lo, (x_tbl3) + addi x_tbl3, x_tbl3, 16 + vle8.v v_gft3_hi, (x_tbl3) + addi x_tbl3, x_tbl3, 16 + + vle8.v v_gft4_lo, (x_tbl4) + addi x_tbl4, x_tbl4, 16 + vle8.v v_gft4_hi, (x_tbl4) + addi x_tbl4, x_tbl4, 16 + + vle8.v v_gft5_lo, (x_tbl5) + addi x_tbl5, x_tbl5, 16 + vle8.v v_gft5_hi, (x_tbl5) + addi x_tbl5, x_tbl5, 16 + + vle8.v v_gft6_lo, (x_tbl6) + addi x_tbl6, x_tbl6, 16 + vle8.v v_gft6_hi, (x_tbl6) + addi x_tbl6, x_tbl6, 16 + + + /* dest 1 */ + vrgather.vv v26, v_gft1_lo, v_src_lo + vrgather.vv v27, v_gft1_hi, v_src_hi + vxor.vv v_dest1, v_dest1, v26 + vxor.vv v_dest1, v_dest1, v27 + + /* dest 2 */ + vrgather.vv v26, v_gft2_lo, v_src_lo + vrgather.vv v27, v_gft2_hi, v_src_hi + vxor.vv v_dest2, v_dest2, v26 + vxor.vv v_dest2, v_dest2, v27 + + /* GF multiplication and accumulation for dest3 */ + vrgather.vv v26, v_gft3_lo, v_src_lo + vrgather.vv v27, v_gft3_hi, v_src_hi + vxor.vv v_dest3, v_dest3, v26 + vxor.vv v_dest3, v_dest3, v27 + + /* GF multiplication and accumulation for dest4 */ + vrgather.vv v26, v_gft4_lo, v_src_lo + vrgather.vv v27, v_gft4_hi, v_src_hi + vxor.vv v_dest4, v_dest4, v26 + vxor.vv v_dest4, v_dest4, v27 + + /* GF multiplication and accumulation for dest5 */ + vrgather.vv v26, v_gft5_lo, v_src_lo + vrgather.vv v27, v_gft5_hi, v_src_hi + vxor.vv v_dest5, v_dest5, v26 + vxor.vv v_dest5, v_dest5, v27 + + /* GF multiplication and accumulation for dest6 */ + vrgather.vv v26, v_gft6_lo, v_src_lo + vrgather.vv v27, v_gft6_hi, v_src_hi + vxor.vv v_dest6, v_dest6, v26 + vxor.vv v_dest6, v_dest6, v27 + + + /* load next source pointer */ + addi x_vec_i, x_vec_i,1 + + /* check if we have processed all vectors */ + blt x_vec_i, x_vec, .Llooprvv_vl_vects + + /* store destination data */ + vse8.v v_dest1, (x_dest1) # x_dest1 v_dest1==v4 + vse8.v v_dest2, (x_dest2) #x_dest2 + vse8.v v_dest3, (x_dest3) #x_dest3 + vse8.v v_dest4, (x_dest4) # x_dest4 + vse8.v v_dest5, (x_dest5) # x_dest5 + vse8.v v_dest6, (x_dest6) # x_dest6 + + add x_dest1,x_dest1, a5 + add x_dest2,x_dest2, a5 + add x_dest3,x_dest3, a5 + add x_dest4,x_dest4, a5 + add x_dest5,x_dest5, a5 + add x_dest6,x_dest6, a5 + + /* increment position */ + add x_pos, x_pos, a5 + j .Llooprvv_vl + +.return_pass: + /* restore callee-saved registers */ + ld s0, 0(sp) + ld s1, 8(sp) + ld s2, 16(sp) + ld s3, 24(sp) + ld s4, 32(sp) + ld s5, 40(sp) + ld s6, 48(sp) + ld s7, 56(sp) + addi sp, sp, 64 + + li a0, 0 + ret + +.return_fail: + li a0, 1 + ret + +#endif diff --git a/erasure_code/riscv64/gf_6vect_mad_rvv.S b/erasure_code/riscv64/gf_6vect_mad_rvv.S new file mode 100644 index 0000000..95d4a66 --- /dev/null +++ b/erasure_code/riscv64/gf_6vect_mad_rvv.S @@ -0,0 +1,241 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_6vect_mad_rvv +.type gf_6vect_mad_rvv, @function + +/* gf_6vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); + */ +/* arguments */ +#define x_len a0 +#define x_vec a1 +#define x_vec_i a2 +#define x_tbl a3 +#define x_src a4 +#define x_dest a5 + +/* returns */ +#define w_ret a0 + +/* local variables */ +#define x_pos t0 +#define x_dest1 t1 +#define x_dest2 t2 +#define x_dest3 t3 +#define x_dest4 t4 +#define x_dest5 t5 +#define x_dest6 t6 + +/* vectors */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v3 +#define v_dest1 v4 +#define v_tmp_lo v5 +#define v_tmp_hi v6 +#define v_gft1_lo v7 +#define v_gft1_hi v8 +#define v_gft2_lo v9 +#define v_gft2_hi v10 +#define v_gft3_lo v11 +#define v_gft3_hi v12 +#define v_gft4_lo v13 +#define v_gft4_hi v14 +#define v_gft5_lo v15 +#define v_gft5_hi v16 +#define v_gft6_lo v17 +#define v_gft6_hi v18 +#define v_dest2 v19 +#define v_dest3 v20 +#define v_dest4 v21 +#define v_dest5 v22 +#define v_dest6 v23 + +gf_6vect_mad_rvv: + /* less than 16 bytes, return_fail */ + li t6, 16 + blt x_len, t6, .return_fail + + /* save callee-saved registers */ + addi sp, sp, -16 + sd s8, 0(sp) + + vsetvli a6, x0, e8, m1 + + /* Load table 1 */ + slli s8, x_vec_i, 5 + add x_tbl, x_tbl, s8 + vle8.v v_gft1_lo, (x_tbl) + addi s8, x_tbl, 16 + vle8.v v_gft1_hi, (s8) + + /* Load table 2 */ + slli s8, x_vec, 5 + add x_tbl, x_tbl, s8 + vle8.v v_gft2_lo, (x_tbl) + addi s8, x_tbl, 16 + vle8.v v_gft2_hi, (s8) + + /* Load table 3 */ + slli s8, x_vec, 5 + add x_tbl, x_tbl, s8 + vle8.v v_gft3_lo, (x_tbl) + addi s8, x_tbl, 16 + vle8.v v_gft3_hi, (s8) + + /* Load table 4 */ + slli s8, x_vec, 5 + add x_tbl, x_tbl, s8 + vle8.v v_gft4_lo, (x_tbl) + addi s8, x_tbl, 16 + vle8.v v_gft4_hi, (s8) + + /* Load table 5 */ + slli s8, x_vec, 5 + add x_tbl, x_tbl, s8 + vle8.v v_gft5_lo, (x_tbl) + addi s8, x_tbl, 16 + vle8.v v_gft5_hi, (s8) + + /* Load table 6 */ + slli s8, x_vec, 5 + add x_tbl, x_tbl, s8 + vle8.v v_gft6_lo, (x_tbl) + addi s8, x_tbl, 16 + vle8.v v_gft6_hi, (s8) + + /* Load destination pointers */ + ld x_dest1, 0(x_dest) + ld x_dest2, 8(x_dest) + ld x_dest3, 16(x_dest) + ld x_dest4, 24(x_dest) + ld x_dest5, 32(x_dest) + ld x_dest6, 40(x_dest) + + li x_pos, 0 + +.Llooprvv_vl: + blt x_pos, x_len, .Lloop_body + j .return_pass + +.Lloop_body: + /* Load source data */ + add a7, x_src, x_pos + vle8.v v_src, (a7) + + /* Split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* load dest data */ + add a7, x_dest1, x_pos + vle8.v v_dest1, (a7) + add a7, x_dest2, x_pos + vle8.v v_dest2, (a7) + add a7, x_dest3, x_pos + vle8.v v_dest3, (a7) + add a7, x_dest4, x_pos + vle8.v v_dest4, (a7) + add a7, x_dest5, x_pos + vle8.v v_dest5, (a7) + add a7, x_dest6, x_pos + vle8.v v_dest6, (a7) + + /* dest1 */ + vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi + vxor.vv v_dest1, v_tmp_lo, v_dest1 + vxor.vv v_dest1, v_tmp_hi, v_dest1 + + /* dest2 */ + vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi + vxor.vv v_dest2, v_tmp_lo, v_dest2 + vxor.vv v_dest2, v_tmp_hi, v_dest2 + + /* dest3 */ + vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi + vxor.vv v_dest3, v_tmp_lo, v_dest3 + vxor.vv v_dest3, v_tmp_hi, v_dest3 + + /* dest4 */ + vrgather.vv v_tmp_lo, v_gft4_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft4_hi, v_src_hi + vxor.vv v_dest4, v_tmp_lo, v_dest4 + vxor.vv v_dest4, v_tmp_hi, v_dest4 + + /* dest5 */ + vrgather.vv v_tmp_lo, v_gft5_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft5_hi, v_src_hi + vxor.vv v_dest5, v_tmp_lo, v_dest5 + vxor.vv v_dest5, v_tmp_hi, v_dest5 + + /* dest6 */ + vrgather.vv v_tmp_lo, v_gft6_lo, v_src_lo + vrgather.vv v_tmp_hi, v_gft6_hi, v_src_hi + vxor.vv v_dest6, v_tmp_lo, v_dest6 + vxor.vv v_dest6, v_tmp_hi, v_dest6 + + /* Store destination data */ + add a7, x_dest1, x_pos + vse8.v v_dest1, (a7) + add a7, x_dest2, x_pos + vse8.v v_dest2, (a7) + add a7, x_dest3, x_pos + vse8.v v_dest3, (a7) + add a7, x_dest4, x_pos + vse8.v v_dest4, (a7) + add a7, x_dest5, x_pos + vse8.v v_dest5, (a7) + add a7, x_dest6, x_pos + vse8.v v_dest6, (a7) + + /* Increment position */ + add x_pos, x_pos, a6 + + j .Llooprvv_vl + +.return_pass: + ld s8, 0(sp) + addi sp, sp, 16 + + li w_ret, 0 + ret + +.return_fail: + li w_ret, 1 + ret + +#endif diff --git a/erasure_code/riscv64/gf_7vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_7vect_dot_prod_rvv.S new file mode 100644 index 0000000..d4cc1d7 --- /dev/null +++ b/erasure_code/riscv64/gf_7vect_dot_prod_rvv.S @@ -0,0 +1,299 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_7vect_dot_prod_rvv +.type gf_7vect_dot_prod_rvv, @function + +/* void gf_7vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + */ + +/* arguments */ +#define x_len a0 /* vector length */ +#define x_vec a1 /* number of source vectors (ie. data blocks) */ +#define x_tbl a2 +#define x_src a3 +#define x_dest a4 + +/* local variables */ +#define x_vec_i t1 +#define x_ptr t2 +#define x_pos t3 + +#define x_tbl1 t4 +#define x_tbl2 t5 +#define x_tbl3 t6 +#define x_tbl4 s8 +#define x_tbl5 a6 + +#define x_tbl6 a7 +#define x_tbl7 s0 +#define x_dest1 s1 +#define x_dest2 s2 +#define x_dest3 s3 +#define x_dest4 s4 +#define x_dest5 s5 +#define x_dest6 s6 +#define x_dest7 s7 + +/* vectors */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v3 + +#define v_dest1 v4 +#define v_dest2 v5 /* destination 2 */ +#define v_dest3 v6 /* destination 3 */ +#define v_dest4 v7 /* destination 4 */ +#define v_dest5 v8 /* destination 5 */ +#define v_dest6 v9 /* destination 6 */ +#define v_dest7 v10 /* destination 7 */ + +#define v_gft1_lo v11 +#define v_gft1_hi v12 +#define v_gft2_lo v13 /* GF table 2 low */ +#define v_gft2_hi v14 /* GF table 2 high */ +#define v_gft3_lo v15 /* GF table 3 low */ +#define v_gft3_hi v16 /* GF table 3 high */ +#define v_gft4_lo v17 /* GF table 4 low */ +#define v_gft4_hi v18 /* GF table 4 high */ +#define v_gft5_lo v19 /* GF table 5 low */ +#define v_gft5_hi v20 /* GF table 5 high */ +#define v_gft6_lo v21 /* GF table 6 low */ +#define v_gft6_hi v22 /* GF table 6 high */ +#define v_gft7_lo v23 +#define v_gft7_hi v24 + + +gf_7vect_dot_prod_rvv: + /* less than 16 bytes, return_fail */ + li t0, 16 + blt x_len, t0, .return_fail + + /* save callee-saved registers */ + addi sp, sp, -80 + sd s0, 0(sp) + sd s1, 8(sp) + sd s2, 16(sp) + sd s3, 24(sp) + sd s4, 32(sp) + sd s5, 40(sp) + sd s6, 48(sp) + sd s7, 56(sp) + sd s8, 64(sp) + + vsetvli t0, x0, e8, m1 + + /* initialize position */ + li x_pos, 0 + + /* load destination pointers */ + ld x_dest1, 0(x_dest) + ld x_dest2, 8(x_dest) + ld x_dest3, 16(x_dest) + ld x_dest4, 24(x_dest) + ld x_dest5, 32(x_dest) + ld x_dest6, 40(x_dest) + ld x_dest7, 48(x_dest) + +/* Loop 1: x_len, vector length */ +.Llooprvv_vl: + /* check if we have processed all elements */ + bge x_pos, x_len, .return_pass + + /* initialize vector loop counter */ + li x_vec_i, 0 + + /* load source pointer */ + ld x_ptr, 0(x_src) + + /* clear destination vectors */ + vmv.v.i v_dest1, 0 + vmv.v.i v_dest2, 0 + vmv.v.i v_dest3, 0 + vmv.v.i v_dest4, 0 + vmv.v.i v_dest5, 0 + vmv.v.i v_dest6, 0 + vmv.v.i v_dest7, 0 + + /* reset table pointers */ + mv x_tbl1, x_tbl + mv x_tbl1, x_tbl + slli a5, x_vec, 5 + add x_tbl2, x_tbl1, a5 + add x_tbl3, x_tbl2, a5 + add x_tbl4, x_tbl3, a5 + add x_tbl5, x_tbl4, a5 + add x_tbl6, x_tbl5, a5 + add x_tbl7, x_tbl6, a5 + +.Llooprvv_vl_vects: + /* load source data */ + slli a5, x_vec_i, 3 + add a5,x_src,a5 + ld x_ptr, 0(a5) + add x_ptr,x_ptr,x_pos + + vle8.v v_src, (x_ptr) + + /* split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* load gf_table's */ + vle8.v v_gft1_lo, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + vle8.v v_gft1_hi, (x_tbl1) + addi x_tbl1, x_tbl1, 16 + + vle8.v v_gft2_lo, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + vle8.v v_gft2_hi, (x_tbl2) + addi x_tbl2, x_tbl2, 16 + + vle8.v v_gft3_lo, (x_tbl3) + addi x_tbl3, x_tbl3, 16 + vle8.v v_gft3_hi, (x_tbl3) + addi x_tbl3, x_tbl3, 16 + + vle8.v v_gft4_lo, (x_tbl4) + addi x_tbl4, x_tbl4, 16 + vle8.v v_gft4_hi, (x_tbl4) + addi x_tbl4, x_tbl4, 16 + + vle8.v v_gft5_lo, (x_tbl5) + addi x_tbl5, x_tbl5, 16 + vle8.v v_gft5_hi, (x_tbl5) + addi x_tbl5, x_tbl5, 16 + + vle8.v v_gft6_lo, (x_tbl6) + addi x_tbl6, x_tbl6, 16 + vle8.v v_gft6_hi, (x_tbl6) + addi x_tbl6, x_tbl6, 16 + + vle8.v v_gft7_lo, (x_tbl7) + addi x_tbl7, x_tbl7, 16 + vle8.v v_gft7_hi, (x_tbl7) + addi x_tbl7, x_tbl7, 16 + + + /* dest 1 */ + vrgather.vv v26, v_gft1_lo, v_src_lo + vrgather.vv v27, v_gft1_hi, v_src_hi + vxor.vv v_dest1, v_dest1, v26 + vxor.vv v_dest1, v_dest1, v27 + + /* dest 2 */ + vrgather.vv v26, v_gft2_lo, v_src_lo + vrgather.vv v27, v_gft2_hi, v_src_hi + vxor.vv v_dest2, v_dest2, v26 + vxor.vv v_dest2, v_dest2, v27 + + /* GF multiplication and accumulation for dest3 */ + vrgather.vv v26, v_gft3_lo, v_src_lo + vrgather.vv v27, v_gft3_hi, v_src_hi + vxor.vv v_dest3, v_dest3, v26 + vxor.vv v_dest3, v_dest3, v27 + + /* GF multiplication and accumulation for dest4 */ + vrgather.vv v26, v_gft4_lo, v_src_lo + vrgather.vv v27, v_gft4_hi, v_src_hi + vxor.vv v_dest4, v_dest4, v26 + vxor.vv v_dest4, v_dest4, v27 + + /* GF multiplication and accumulation for dest5 */ + vrgather.vv v26, v_gft5_lo, v_src_lo + vrgather.vv v27, v_gft5_hi, v_src_hi + vxor.vv v_dest5, v_dest5, v26 + vxor.vv v_dest5, v_dest5, v27 + + /* GF multiplication and accumulation for dest6 */ + vrgather.vv v26, v_gft6_lo, v_src_lo + vrgather.vv v27, v_gft6_hi, v_src_hi + vxor.vv v_dest6, v_dest6, v26 + vxor.vv v_dest6, v_dest6, v27 + + + /* GF multiplication and accumulation for dest7 */ + vrgather.vv v26, v_gft7_lo, v_src_lo + vrgather.vv v27, v_gft7_hi, v_src_hi + vxor.vv v_dest7, v_dest7, v26 + vxor.vv v_dest7, v_dest7, v27 + + /* increment x_vec_i */ + addi x_vec_i, x_vec_i, 1 + blt x_vec_i, x_vec, .Llooprvv_vl_vects + + /* Store results to destination */ + vse8.v v_dest1, (x_dest1) + vse8.v v_dest2, (x_dest2) + vse8.v v_dest3, (x_dest3) + vse8.v v_dest4, (x_dest4) + vse8.v v_dest5, (x_dest5) + vse8.v v_dest6, (x_dest6) + vse8.v v_dest7, (x_dest7) + + add x_dest1,x_dest1, t0 + add x_dest2,x_dest2, t0 + add x_dest3,x_dest3, t0 + add x_dest4,x_dest4, t0 + add x_dest5,x_dest5, t0 + add x_dest6,x_dest6, t0 + add x_dest7,x_dest7, t0 + + /* increment one vector length */ + add x_pos, x_pos, t0 + j .Llooprvv_vl + +.return_pass: + /* Restore callee-saved registers */ + ld s0, 0(sp) + ld s1, 8(sp) + ld s2, 16(sp) + ld s3, 24(sp) + ld s4, 32(sp) + ld s5, 40(sp) + ld s6, 48(sp) + ld s7, 56(sp) + ld s8, 64(sp) + addi sp, sp, 80 + + /* Return success */ + li a0, 0 + ret + +.return_fail: + li a0, 1 # return fail + ret + +#endif diff --git a/erasure_code/riscv64/gf_vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_vect_dot_prod_rvv.S new file mode 100644 index 0000000..471b65e --- /dev/null +++ b/erasure_code/riscv64/gf_vect_dot_prod_rvv.S @@ -0,0 +1,136 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + + +# RISC-V RVV implementation of gf_vect_dot_prod_rvv + +# Function: gf_vect_dot_prod_rvv +# Arguments: +# a0: len (vector length) +# a1: vlen (number of source vectors) +# a2: gftbls (pointer to GF(2^8) multiplication tables) +# a3: src (pointer to array of source vector pointers) +# a4: dest (pointer to destination vector) + +# Local variables: +# t0: vec_i (source vector index) +# t1: ptr (pointer to current source vector) +# t2: pos (current position in vector) +# t3: tbl1 (pointer to current GF table) + +# Vector registers: +# v0: z_mask0f (mask for low 4 bits) +# v1: z_src (source vector data) +# v2: z_src_lo (low 4 bits of source vector) +# v3: z_src_hi (high 4 bits of source vector) +# v4: z_dest (destination vector) +# v5: z_gft1_lo (low 8 bits of GF table) +# v6: z_gft1_hi (high 8 bits of GF table) + +#if HAVE_RVV +.global gf_vect_dot_prod_rvv +.type gf_vect_dot_prod_rvv, @function + +gf_vect_dot_prod_rvv: + # Check if len < 16 + li t4, 16 + blt a0, t4, .return_fail + + vsetvli t5, zero, e8, m1 # Set vector length to maximum + + # Initialize pos = 0 + li t2, 0 + + # Multiply vlen by 8 (each pointer is 8 bytes) + slli a1, a1, 3 + +.Llooprvv_vl: + # Check if pos >= len + bge t2, a0, .return_pass + + # Clear z_dest + vmv.v.i v4, 0 + + # Initialize vec_i = 0 + li t0, 0 + + # Reset tbl1 to gftbls + mv t3, a2 + +.Llooprvv_vl_vects: + # Load src[vec_i] into ptr + add t6, a3, t0 # src + vec_i * 8 + ld t1, 0(t6) # Load pointer to current source vector + + # Load src data into z_src + add t1, t1, t2 # add offset + vle8.v v1, (t1) # Load source vector into v1 + + # Increment vec_i + addi t0, t0, 8 + + # Load GF table (low and high) + vle8.v v5, (t3) # Load low 8 bits of GF table + addi t3, t3, 16 # Move to next GF table entry + vle8.v v6, (t3) # Load high 8 bits of GF table + addi t3, t3, 16 # Move to next GF table entry + + # Split src into low and high 4 bits + vand.vi v2, v1, 0x0F # z_src_lo = z_src & z_mask0f + vsrl.vi v3, v1, 4 # z_src_hi = z_src >> 4 + + # GF multiplication (table lookup) + vrgather.vv v8, v5, v2 # z_gft1_lo = GF table lookup for low 4 bits + vrgather.vv v9, v6, v3 # z_gft1_hi = GF table lookup for high 4 bits + + # GF addition (XOR) + vxor.vv v4, v4, v8 # z_dest ^= z_gft1_lo + vxor.vv v4, v4, v9 # z_dest ^= z_gft1_hi + + # Check if vec_i < vlen + blt t0, a1, .Llooprvv_vl_vects + + # Store z_dest to dest[pos] + vse8.v v4, (a4) # Store destination vector + add a4, a4, t5 # Move dest pointer to next position + + # Increment pos + add t2, t2, t5 # pos += 16 (vector length) + + j .Llooprvv_vl + +.return_pass: + li a0, 0 # Return 0 (success) + ret + +.return_fail: + li a0, 1 # Return 1 (failure) + ret + +#endif diff --git a/erasure_code/riscv64/gf_vect_mad_rvv.S b/erasure_code/riscv64/gf_vect_mad_rvv.S new file mode 100644 index 0000000..2c9aeb8 --- /dev/null +++ b/erasure_code/riscv64/gf_vect_mad_rvv.S @@ -0,0 +1,119 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_vect_mad_rvv +.type gf_vect_mad_rvv, @function + +/* gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char *dest); + */ +/* arguments */ +#define x_len a0 +#define x_vec_i a2 +#define x_tbl a3 +#define x_src a4 +#define x_dest a5 + +/* returns */ +#define w_ret a0 + +/* local variables */ +#define x_pos t0 + +/* vectors */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v3 +#define v_dest v4 +#define v_tmp1_lo v5 +#define v_tmp1_hi v6 +#define v_gft1_lo v7 +#define v_gft1_hi v8 + +gf_vect_mad_rvv: + /* less than 16 bytes, return_fail */ + li t1, 16 + blt x_len, t1, .return_fail + + vsetvli t2, x0, e8, m1 + + /* x_tbl += x_vec_i * 2^5 */ + slli t1, x_vec_i, 5 + add x_tbl, x_tbl, t1 + + /* Load gft1_lo and gft1_hi */ + vle8.v v_gft1_lo, (x_tbl) + addi t1, x_tbl, 16 + vle8.v v_gft1_hi, (t1) + + li x_pos, 0 + +.Lloop_rvv_vl: + /* load src data */ + vle8.v v_src, (x_src) + + /* split 4-bit lo; 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* load dest data */ + vle8.v v_dest, (x_dest) + + /* table indexing, ie. gf(2^8) multiplication */ + /* RISC-V RVV does not have tbl instruction, use vrgather.vv */ + vrgather.vv v_tmp1_lo, v_gft1_lo, v_src_lo + vrgather.vv v_tmp1_hi, v_gft1_hi, v_src_hi + + /* exclusive or, ie. gf(2^8) add */ + vxor.vv v_dest, v_tmp1_lo, v_dest + vxor.vv v_dest, v_tmp1_hi, v_dest + + /* store dest data */ + vse8.v v_dest, (x_dest) + + /* increment one vector length */ + add x_pos, x_pos, t2 + add x_src, x_src, t2 + add x_dest, x_dest, t2 + + blt x_pos, x_len, .Lloop_rvv_vl + +.return_pass: + li w_ret, 0 + ret + +.return_fail: + li w_ret, 1 + ret + +#endif diff --git a/erasure_code/riscv64/gf_vect_mul_rvv.S b/erasure_code/riscv64/gf_vect_mul_rvv.S new file mode 100644 index 0000000..92a8982 --- /dev/null +++ b/erasure_code/riscv64/gf_vect_mul_rvv.S @@ -0,0 +1,114 @@ +################################################################## +# Copyright (c) 2025 sanechips Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of sanechips Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#if HAVE_RVV +.text +.align 2 + +.global gf_vect_mul_rvv +.type gf_vect_mul_rvv, @function + +/* Function arguments: + * a0: len - Length of vector in bytes. + * a1: gftbl - Pointer to 32-byte array of pre-calculated constants. + * a2: src - Pointer to source data array. + * a3: dest - Pointer to destination data array. + * Returns: + * a0: 0 for success, 1 for failure. + */ + +/* Local variables */ +#define x_pos t0 +#define x_tmp t1 +#define x_ptr t2 +#define x_len a0 +#define x_tbl a1 +#define x_src a2 +#define x_dest a3 + +/* Vector registers */ +#define v_src v1 +#define v_src_lo v2 +#define v_src_hi v3 +#define v_dest v4 +#define v_tmp1_lo v5 +#define v_tmp1_hi v6 +#define v_gft1_lo v7 +#define v_gft1_hi v8 + +gf_vect_mul_rvv: + /* Check if len is 32 bytes */ + andi x_tmp, x_len, 0x1F + bnez x_tmp, .return_fail + + vsetvli t6, x0, e8, m1 + + /* Load pre-calculated constants into v_gft1_lo and v_gft1_hi */ + vle8.v v_gft1_lo, (x_tbl) + addi t3, x_tbl, 16 + vle8.v v_gft1_hi, (t3) + + /* Initialize position counter */ + li x_pos, 0 + +.Llooprvv_vl: + /* Load source data into v_src */ + add x_ptr,x_src,x_pos + vle8.v v_src, (x_ptr) + + /* Split 4-bit lo and 4-bit hi */ + vand.vi v_src_lo, v_src, 0x0F + vsrl.vi v_src_hi, v_src, 4 + + /* Table lookup (GF multiplication) */ + vrgather.vv v_tmp1_lo, v_gft1_lo, v_src_lo + vrgather.vv v_tmp1_hi, v_gft1_hi, v_src_hi + + /* XOR (GF addition) */ + vxor.vv v_dest, v_tmp1_hi, v_tmp1_lo + + /* Store result to destination */ + vse8.v v_dest, (x_dest) + + /* Increment position counter */ + add x_pos, x_pos, t6 + add x_dest, x_dest, t6 + + /* Check if we have processed all bytes */ + blt x_pos, x_len, .Llooprvv_vl + +.return_pass: + li a0, 0 + ret + +.return_fail: + li a0, 1 + ret + +#endif