mirror of
				https://github.com/intel/isa-l.git
				synced 2025-10-28 19:51:56 +01:00 
			
		
		
		
	erasure_code: optimize RVV implementation
The ISA-L EC code has been written using RVV vector instructions and the minimum multiplication table, resulting in a performance improvement of over 10 times compared to the existing implementation. Signed-off-by: Shuo Lv <lv.shuo@sanechips.com.cn>
This commit is contained in:
		| @@ -152,7 +152,7 @@ v2.32 | |||||||
|   - Added new RVV xor_gen, pq_gen implementations. |   - Added new RVV xor_gen, pq_gen implementations. | ||||||
|  |  | ||||||
| * Erasure coding improvements: | * Erasure coding improvements: | ||||||
|   - Added new RVV ec_encode_data, gf_vect_dot_prod, gf_vect_mul implementations. |   - Added new RVV ec_encode_data,ec_encode_data_update,gf_vect_mad, gf_vect_dot_prod, gf_vect_mul implementations. | ||||||
|  |  | ||||||
| * Zero-memory detection improvements: | * Zero-memory detection improvements: | ||||||
|   - Added new RVV implementations. |   - Added new RVV implementations. | ||||||
|   | |||||||
| @@ -67,8 +67,13 @@ case "${CPU}" in | |||||||
| 			])], | 			])], | ||||||
| 			[AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions]) | 			[AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions]) | ||||||
| 			AM_CONDITIONAL([HAVE_RVV], [true]) rvv=yes], | 			AM_CONDITIONAL([HAVE_RVV], [true]) rvv=yes], | ||||||
| 			[AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no] | 			[AC_DEFINE([HAVE_RVV], [0], [Disable RVV instructions]) | ||||||
|  | 			AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no] | ||||||
| 		) | 		) | ||||||
|  | 		if test "x$rvv" = "xyes"; then | ||||||
|  | 		  CFLAGS+=" -march=rv64gcv" | ||||||
|  | 		  CCASFLAGS+=" -march=rv64gcv" | ||||||
|  | 		fi | ||||||
| 		AC_MSG_RESULT([$rvv]) | 		AC_MSG_RESULT([$rvv]) | ||||||
| 		;; | 		;; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -50,6 +50,9 @@ | |||||||
| #define EFENCE_TEST_MIN_SIZE 16 | #define EFENCE_TEST_MIN_SIZE 16 | ||||||
| #define EFENCE_TEST_MAX_SIZE EFENCE_TEST_MIN_SIZE + 0x100 | #define EFENCE_TEST_MAX_SIZE EFENCE_TEST_MIN_SIZE + 0x100 | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | #define EC_ALIGNED_ADDR | ||||||
|  | #endif | ||||||
| #ifdef EC_ALIGNED_ADDR | #ifdef EC_ALIGNED_ADDR | ||||||
| // Define power of 2 range to check ptr, len alignment | // Define power of 2 range to check ptr, len alignment | ||||||
| #define PTR_ALIGN_CHK_B 0 | #define PTR_ALIGN_CHK_B 0 | ||||||
|   | |||||||
| @@ -35,8 +35,13 @@ | |||||||
| #include "test.h" | #include "test.h" | ||||||
|  |  | ||||||
| #ifndef ALIGN_SIZE | #ifndef ALIGN_SIZE | ||||||
|  | #if HAVE_RVV | ||||||
|  | #define EC_ALIGNED_ADDR | ||||||
|  | #define ALIGN_SIZE 32 | ||||||
|  | #else | ||||||
| #define ALIGN_SIZE 16 | #define ALIGN_SIZE 16 | ||||||
| #endif | #endif | ||||||
|  | #endif | ||||||
|  |  | ||||||
| // By default, test multibinary version | // By default, test multibinary version | ||||||
| #ifndef FUNCTION_UNDER_TEST | #ifndef FUNCTION_UNDER_TEST | ||||||
|   | |||||||
| @@ -1,3 +1,31 @@ | |||||||
|  | ######################################################################### | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
| ######################################################################## | ######################################################################## | ||||||
| #  Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). | #  Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). | ||||||
| # | # | ||||||
| @@ -28,8 +56,20 @@ | |||||||
| ######################################################################## | ######################################################################## | ||||||
|  |  | ||||||
| lsrc_riscv64 += \ | lsrc_riscv64 += \ | ||||||
| 	erasure_code/riscv64/ec_multibinary_riscv64_dispatcher.c \ | 		erasure_code/riscv64/ec_riscv64_dispatcher.c \ | ||||||
| 		erasure_code/riscv64/ec_multibinary_riscv64.S \ | 		erasure_code/riscv64/ec_multibinary_riscv64.S \ | ||||||
| 	erasure_code/riscv64/ec_gf_vect_mul_rvv.S \ | 		erasure_code/riscv64/ec_riscv64_highlevel_func.c \ | ||||||
| 	erasure_code/riscv64/ec_gf_vect_dot_prod_rvv.S \ | 		erasure_code/riscv64/gf_vect_dot_prod_rvv.S \ | ||||||
| 	erasure_code/riscv64/ec_encode_data_rvv.S | 		erasure_code/riscv64/gf_2vect_dot_prod_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_3vect_dot_prod_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_4vect_dot_prod_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_5vect_dot_prod_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_6vect_dot_prod_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_7vect_dot_prod_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_vect_mad_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_2vect_mad_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_3vect_mad_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_4vect_mad_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_5vect_mad_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_6vect_mad_rvv.S \ | ||||||
|  | 		erasure_code/riscv64/gf_vect_mul_rvv.S | ||||||
|   | |||||||
| @@ -1,154 +0,0 @@ | |||||||
| /********************************************************************** |  | ||||||
|   Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). |  | ||||||
|  |  | ||||||
|   Redistribution and use in source and binary forms, with or without |  | ||||||
|   modification, are permitted provided that the following conditions |  | ||||||
|   are met: |  | ||||||
|     * Redistributions of source code must retain the above copyright |  | ||||||
|       notice, this list of conditions and the following disclaimer. |  | ||||||
|     * Redistributions in binary form must reproduce the above copyright |  | ||||||
|       notice, this list of conditions and the following disclaimer in |  | ||||||
|       the documentation and/or other materials provided with the |  | ||||||
|       distribution. |  | ||||||
|     * Neither the name of ISCAS nor the names of its |  | ||||||
|       contributors may be used to endorse or promote products derived |  | ||||||
|       from this software without specific prior written permission. |  | ||||||
|  |  | ||||||
|   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |  | ||||||
|   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |  | ||||||
|   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |  | ||||||
|   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |  | ||||||
|   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |  | ||||||
|   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |  | ||||||
|   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |  | ||||||
|   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |  | ||||||
|   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |  | ||||||
|   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |  | ||||||
|   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |  | ||||||
| **********************************************************************/ |  | ||||||
| #if HAVE_RVV |  | ||||||
| #include "ec_table.S" |  | ||||||
| .option         arch, +v |  | ||||||
| .global         ec_encode_data_rvv |  | ||||||
| .type           ec_encode_data_rvv, %function |  | ||||||
| ec_encode_data_rvv: |  | ||||||
|   blez          a2, 3f |  | ||||||
|   blez          a0, 3f |  | ||||||
|  |  | ||||||
|   lla           t0, gff_base |  | ||||||
|   lla           t1, gflog_base |  | ||||||
|   addi          a3, a3, 1 |  | ||||||
|   vsetvli       zero, a1, e8, mf2, ta, ma |  | ||||||
|   vmv.v.i       v20, 0 |  | ||||||
|   li            t3, 32 |  | ||||||
|   mv            a6, a0                   // backup len |  | ||||||
|   mv            a7, a5                   // backup dest |  | ||||||
|  |  | ||||||
|   csrr          t5, vlenb                // vlen/8 |  | ||||||
|   srli          t5, t5, 1                // mf2: vlen/16 |  | ||||||
|   blt           t5, a1, slow             // vlen/16(hardware) < vlen(software) |  | ||||||
|  |  | ||||||
| 2: |  | ||||||
|   li            t2, 0                    // l |  | ||||||
|   vlse8.v       v24, (a3), t3            // v[j*32+1] |  | ||||||
|   vmsne.vi      v12, v24, 0              // if v == 0 |  | ||||||
|   vluxei8.v     v24, (t1), v24           // gflag_base[v[]] |  | ||||||
|   ld            a5, (a5) |  | ||||||
|  |  | ||||||
| 1: |  | ||||||
|   vsetvli       zero, zero, e8, mf2, ta, ma |  | ||||||
|   vle64.v       v16, (a4)                // src[j] |  | ||||||
|   vluxei64.v    v16, (t2), v16           // src[j][i] |  | ||||||
|   vmsne.vi      v0, v16, 0               // if src == 0 |  | ||||||
|   vmand.mm      v0, v0, v12              // if src == 0 || v == 0 |  | ||||||
|   vluxei8.v     v16, (t1), v16, v0.t     // gflag_base[src[j][i]] |  | ||||||
|   vwaddu.vv     v8, v16, v24, v0.t |  | ||||||
|   vmv.v.i       v16, 0 |  | ||||||
|   vsetvli       zero, zero, e8, mf2, ta, mu |  | ||||||
|   vluxei16.v    v16, (t0), v8, v0.t      // gff_base[i] |  | ||||||
|   vxor.vv       v20, v16, v20 |  | ||||||
|   vmv.s.x       v8, zero |  | ||||||
|   vredxor.vs    v8, v20, v8 |  | ||||||
|   vmv.x.s       t5, v8 |  | ||||||
|   addi          a0, a0, -1               // len |  | ||||||
|   sb            t5, (a5)                 // dest[0][i] |  | ||||||
|   addi          t2, t2, 1                // src[j][i] |  | ||||||
|   vmv.v.i       v20, 0 |  | ||||||
|   addi          a5, a5, 1                // dest[i] |  | ||||||
|   bnez          a0, 1b |  | ||||||
|  |  | ||||||
|   addi          a2, a2, -1               // l(dests) |  | ||||||
|   addi          a7, a7, 8 |  | ||||||
|   mv            a0, a6                   // restore len |  | ||||||
|   mv            a5, a7                   // update unsigned char **dest |  | ||||||
|   slli          t5, a1, 5                // += vlen * 32 |  | ||||||
|   add           a3, a3, t5 |  | ||||||
|   bnez          a2, 2b |  | ||||||
|  |  | ||||||
|   ret |  | ||||||
|  |  | ||||||
| slow: |  | ||||||
|   addi          sp, sp, -16 |  | ||||||
|   sd            s2, 0(sp) |  | ||||||
|   sd            s3, 8(sp) |  | ||||||
|   mv            s3, a4                   // src |  | ||||||
|   mv            s2, a3                   // v |  | ||||||
|   mv            t4, a1                   // backup vlen |  | ||||||
|  |  | ||||||
| 2: |  | ||||||
|   li            t2, 0                    // i < len |  | ||||||
|   ld            a5, (a5) |  | ||||||
|  |  | ||||||
| 1: |  | ||||||
|   vsetvli       t6, a1, e8, mf2, ta, ma |  | ||||||
|   vle64.v       v16, (a4)                // src[j] |  | ||||||
|   vluxei64.v    v16, (t2), v16           // src[j][i] |  | ||||||
|   vlse8.v       v24, (a3), t3            // v[j*32+1] |  | ||||||
|   vmsne.vi      v12, v24, 0              // if v == 0 |  | ||||||
|   vmsne.vi      v0, v16, 0               // if src == 0 |  | ||||||
|   vmand.mm      v0, v0, v12              // if src == 0 || v == 0 |  | ||||||
|   vluxei8.v     v16, (t1), v16, v0.t     // gflag_base[src[j][i]] |  | ||||||
|   vluxei8.v     v24, (t1), v24           // gflag_base[v[]] |  | ||||||
|   vwaddu.vv     v8, v16, v24, v0.t |  | ||||||
|   vmv.v.i       v16, 0 |  | ||||||
|   vsetvli       zero, zero, e8, mf2, ta, mu |  | ||||||
|   vluxei16.v    v16, (t0), v8, v0.t      // gff_base[i] |  | ||||||
|   vxor.vv       v20, v16, v20 |  | ||||||
|   sub           a1, a1, t6 |  | ||||||
|   slli          t5, t6, 5 |  | ||||||
|   add           a3, a3, t5               // v += 32 * vlen |  | ||||||
|   slli          t5, t6, 3 |  | ||||||
|   add           a4, a4, t5               // src += 8 * vlen |  | ||||||
|   bnez          a1, 1b                   // for (j = 0; j < vlen; j++) |  | ||||||
|  |  | ||||||
|   vsetvli       zero, t4, e8, mf2, ta, ma |  | ||||||
|   vmv.s.x       v8, zero |  | ||||||
|   vredxor.vs    v8, v20, v8 |  | ||||||
|   vmv.x.s       t5, v8 |  | ||||||
|   addi          a0, a0, -1               // len |  | ||||||
|   sb            t5, (a5)                 // dest[0][i] |  | ||||||
|   addi          t2, t2, 1                // src[j][i] |  | ||||||
|   vmv.v.i       v20, 0 |  | ||||||
|   mv            a1, t4                   // restore vlen |  | ||||||
|   mv            a3, s2                   // restore v |  | ||||||
|   mv            a4, s3                   // restore src |  | ||||||
|   addi          a5, a5, 1                // dest[i] |  | ||||||
|   bnez          a0, 1b                   // for (i = 0; i < len; i++) |  | ||||||
|  |  | ||||||
|   addi          a2, a2, -1               // l(dests) |  | ||||||
|   addi          a7, a7, 8                // for (l = 0; l < dests; l++) |  | ||||||
|   mv            a0, a6                   // restore len |  | ||||||
|   mv            a5, a7 |  | ||||||
|   slli          t5, t4, 5 |  | ||||||
|   add           a3, a3, t5               // v += vlen * 32 |  | ||||||
|   mv            s2, a3 |  | ||||||
|   bnez          a2, 2b                   // for (l = 0; l < dests; l++) { |  | ||||||
|  |  | ||||||
|   ld            s2, 0(sp) |  | ||||||
|   ld            s3, 8(sp) |  | ||||||
|   addi          sp, sp, 16 |  | ||||||
|  |  | ||||||
| 3: |  | ||||||
|   ret |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
| @@ -1,120 +0,0 @@ | |||||||
| /********************************************************************** |  | ||||||
|   Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). |  | ||||||
|  |  | ||||||
|   Redistribution and use in source and binary forms, with or without |  | ||||||
|   modification, are permitted provided that the following conditions |  | ||||||
|   are met: |  | ||||||
|     * Redistributions of source code must retain the above copyright |  | ||||||
|       notice, this list of conditions and the following disclaimer. |  | ||||||
|     * Redistributions in binary form must reproduce the above copyright |  | ||||||
|       notice, this list of conditions and the following disclaimer in |  | ||||||
|       the documentation and/or other materials provided with the |  | ||||||
|       distribution. |  | ||||||
|     * Neither the name of ISCAS nor the names of its |  | ||||||
|       contributors may be used to endorse or promote products derived |  | ||||||
|       from this software without specific prior written permission. |  | ||||||
|  |  | ||||||
|   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |  | ||||||
|   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |  | ||||||
|   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |  | ||||||
|   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |  | ||||||
|   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |  | ||||||
|   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |  | ||||||
|   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |  | ||||||
|   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |  | ||||||
|   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |  | ||||||
|   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |  | ||||||
|   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |  | ||||||
| **********************************************************************/ |  | ||||||
| #if HAVE_RVV |  | ||||||
| #include "ec_table.S" |  | ||||||
| .option         arch, +v |  | ||||||
| .global         gf_vect_dot_prod_rvv |  | ||||||
| .type           gf_vect_dot_prod_rvv, %function |  | ||||||
| gf_vect_dot_prod_rvv: |  | ||||||
|   blez          a0, 3f |  | ||||||
|  |  | ||||||
|   lla           t0, gff_base |  | ||||||
|   lla           t1, gflog_base |  | ||||||
|   addi          a2, a2, 1 |  | ||||||
|   vsetvli       zero, a1, e8, mf2, ta, ma |  | ||||||
|   vmv.v.i       v20, 0 |  | ||||||
|   li            t2, 0 |  | ||||||
|   li            t3, 32 |  | ||||||
|   csrr          t5, vlenb                // vlen/8 |  | ||||||
|   srli          t5, t5, 1                // mf2: vlen/16 |  | ||||||
|   blt           t5, a1, slow             // vlen/16(hardware) < vlen(software) |  | ||||||
|  |  | ||||||
|   vlse8.v       v24, (a2), t3            // v[j*32+1] |  | ||||||
|   vmsne.vi      v12, v24, 0              // if v == 0 |  | ||||||
|   vluxei8.v     v24, (t1), v24           // gflag_base[v[]] |  | ||||||
|  |  | ||||||
| 1: |  | ||||||
|   vsetvli       zero, zero, e8, mf2, ta, ma |  | ||||||
|   vle64.v       v16, (a3)                // src[j] |  | ||||||
|   vluxei64.v    v16, (t2), v16           // src[j][i] |  | ||||||
|   vmsne.vi      v0, v16, 0               // if src == 0 |  | ||||||
|   vmand.mm      v0, v0, v12              // if src == 0 || v == 0 |  | ||||||
|   vluxei8.v     v16, (t1), v16, v0.t     // gflag_base[src[j][i]] |  | ||||||
|   vwaddu.vv     v8, v16, v24, v0.t |  | ||||||
|   vmv.v.i       v16, 0 |  | ||||||
|   vsetvli       zero, zero, e8, mf2, ta, mu |  | ||||||
|   vluxei16.v    v16, (t0), v8, v0.t      // gff_base[i] |  | ||||||
|   vxor.vv       v20, v16, v20 |  | ||||||
|  |  | ||||||
|   vmv.s.x       v8, zero |  | ||||||
|   vredxor.vs    v8, v20, v8 |  | ||||||
|   vmv.x.s       t5, v8 |  | ||||||
|   addi          a0, a0, -1               // len |  | ||||||
|   sb            t5, (a4) |  | ||||||
|   addi          t2, t2, 1                // src[j][i] |  | ||||||
|   vmv.v.i       v20, 0 |  | ||||||
|   addi          a4, a4, 1                // dest[i] |  | ||||||
|   bnez          a0, 1b |  | ||||||
|   ret |  | ||||||
|  |  | ||||||
| slow: |  | ||||||
|   mv            a7, a3                   // src |  | ||||||
|   mv            a6, a2                   // v |  | ||||||
|   mv            t4, a1                   // vlen |  | ||||||
|  |  | ||||||
| 1: |  | ||||||
|   vsetvli       t6, a1, e8, mf2, ta, ma |  | ||||||
|   vle64.v       v16, (a3) |  | ||||||
|   vluxei64.v    v16, (t2), v16           // src[j][i] |  | ||||||
|   vlse8.v       v24, (a2), t3            // v[j*32+1] |  | ||||||
|   vmsne.vi      v0, v16, 0               // if src == 0 |  | ||||||
|   vmsne.vi      v12, v24, 0              // if v == 0 |  | ||||||
|   vmand.mm      v0, v0, v12 |  | ||||||
|   vluxei8.v     v16, (t1), v16, v0.t     // gflag_base[src[j][i]] |  | ||||||
|   vluxei8.v     v24, (t1), v24, v0.t     // gflag_base[v[]] |  | ||||||
|   vwaddu.vv     v8, v16, v24, v0.t |  | ||||||
|   vmv.v.i       v16, 0 |  | ||||||
|   vsetvli       zero, zero, e8, mf2, ta, mu |  | ||||||
|   vluxei16.v    v16, (t0), v8, v0.t      // gff_base[i] |  | ||||||
|   vxor.vv       v20, v16, v20 |  | ||||||
|   slli          t5, t6, 5 |  | ||||||
|   add           a2, a2, t5               // v += 32 * vlen |  | ||||||
|   slli          t5, t6, 3 |  | ||||||
|   add           a3, a3, t5               // src += 8 * vlen |  | ||||||
|   sub           a1, a1, t6               // vlen |  | ||||||
|   bnez          a1, 1b                   // for (j = 0; j < vlen; j++) |  | ||||||
|  |  | ||||||
|   vsetvli       zero, t4, e8, mf2, ta, mu |  | ||||||
|   vmv.s.x       v8, zero |  | ||||||
|   vredxor.vs    v8, v20, v8 |  | ||||||
|   vmv.x.s       t5, v8 |  | ||||||
|   addi          a0, a0, -1               // len |  | ||||||
|   mv            a3, a7                   // src |  | ||||||
|   mv            a2, a6                   // v |  | ||||||
|   mv            a1, t4                   // vlen |  | ||||||
|   addi          t2, t2, 1                // i |  | ||||||
|   sb            t5, (a4) |  | ||||||
|   vmv.v.i       v20, 0 |  | ||||||
|   addi          a4, a4, 1                // dest[i] |  | ||||||
|   bnez          a0, 1b                   // for (i = 0; i < len; i++) { |  | ||||||
|  |  | ||||||
| 3: |  | ||||||
|   ret |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
| @@ -1,76 +0,0 @@ | |||||||
| /********************************************************************** |  | ||||||
|   Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). |  | ||||||
|  |  | ||||||
|   Redistribution and use in source and binary forms, with or without |  | ||||||
|   modification, are permitted provided that the following conditions |  | ||||||
|   are met: |  | ||||||
|     * Redistributions of source code must retain the above copyright |  | ||||||
|       notice, this list of conditions and the following disclaimer. |  | ||||||
|     * Redistributions in binary form must reproduce the above copyright |  | ||||||
|       notice, this list of conditions and the following disclaimer in |  | ||||||
|       the documentation and/or other materials provided with the |  | ||||||
|       distribution. |  | ||||||
|     * Neither the name of ISCAS nor the names of its |  | ||||||
|       contributors may be used to endorse or promote products derived |  | ||||||
|       from this software without specific prior written permission. |  | ||||||
|  |  | ||||||
|   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |  | ||||||
|   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |  | ||||||
|   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |  | ||||||
|   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |  | ||||||
|   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |  | ||||||
|   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |  | ||||||
|   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |  | ||||||
|   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |  | ||||||
|   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |  | ||||||
|   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |  | ||||||
|   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |  | ||||||
| **********************************************************************/ |  | ||||||
| #if HAVE_RVV |  | ||||||
| #include "ec_table.S" |  | ||||||
| .option         arch, +v |  | ||||||
| .global         gf_vect_mul_rvv |  | ||||||
| .type           gf_vect_mul_rvv, %function |  | ||||||
| gf_vect_mul_rvv: |  | ||||||
|   li            t4, 32 |  | ||||||
|   rem           t4, a0, t4 |  | ||||||
|   bnez          t4, ret1                // (len % 32) != 0 |  | ||||||
|  |  | ||||||
|   lla           t0, gff_base |  | ||||||
|   lla           t1, gflog_base |  | ||||||
|   lbu           t2, 1(a1)               // unsigned char c = a[1]; |  | ||||||
|   beqz          t2, 2f |  | ||||||
|   add           t2, t1, t2              // &gflog_base[c] |  | ||||||
|   lbu           t2, (t2)                // gflog_base[c] |  | ||||||
|  |  | ||||||
| 1: |  | ||||||
|   vsetvli       t6, a0, e8, m4, ta, ma |  | ||||||
|   vle8.v        v16, (a2)               // src |  | ||||||
|   vmsne.vi      v0, v16, 0              // if b == 0 |  | ||||||
|   vluxei8.v     v16, (t1), v16, v0.t    // gflag_base[b] |  | ||||||
|   vwaddu.vx     v8, v16, t2, v0.t |  | ||||||
|   vmv.v.i       v16, 0 |  | ||||||
|   vluxei16.v    v16, (t0), v8, v0.t     // gff_base[i] |  | ||||||
|   vse8.v        v16, (a3) |  | ||||||
|   add           a2, a2, t6 |  | ||||||
|   add           a3, a3, t6 |  | ||||||
|   sub           a0, a0, t6 |  | ||||||
|   bnez          a0, 1b |  | ||||||
|   ret |  | ||||||
|  |  | ||||||
| 2: |  | ||||||
|   vsetvli       t6, a0, e8, m8, ta, ma |  | ||||||
|   vmv.v.i       v0, 0 |  | ||||||
| 3: |  | ||||||
|   vsetvli       t6, a0, e8, m8, ta, ma |  | ||||||
|   vse8.v        v0, (a3) |  | ||||||
|   add           a3, a3, t6 |  | ||||||
|   sub           a0, a0, t6 |  | ||||||
|   bnez          a0, 3b |  | ||||||
|   ret |  | ||||||
|  |  | ||||||
| ret1: |  | ||||||
|   li            a0, -1 |  | ||||||
|   ret |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
| @@ -1,3 +1,31 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
| /********************************************************************** | /********************************************************************** | ||||||
|   Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). |   Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). | ||||||
|  |  | ||||||
| @@ -29,16 +57,9 @@ | |||||||
|  |  | ||||||
| #include "riscv64_multibinary.h" | #include "riscv64_multibinary.h" | ||||||
|  |  | ||||||
| #if HAVE_RVV | mbin_interface ec_encode_data | ||||||
| mbin_interface gf_vect_mul | mbin_interface gf_vect_mul | ||||||
| mbin_interface gf_vect_dot_prod | mbin_interface gf_vect_dot_prod | ||||||
|     mbin_interface              ec_encode_data | mbin_interface gf_vect_mad | ||||||
| #else | mbin_interface ec_encode_data_update | ||||||
|     mbin_interface_base         gf_vect_mul gf_vect_mul_base | mbin_interface ec_init_tables | ||||||
|     mbin_interface_base         gf_vect_dot_prod gf_vect_dot_prod_base |  | ||||||
|     mbin_interface_base         ec_encode_data ec_encode_data_base |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| mbin_interface_base ec_init_tables ec_init_tables_base |  | ||||||
| mbin_interface_base ec_encode_data_update ec_encode_data_update_base |  | ||||||
| mbin_interface_base gf_vect_mad gf_vect_mad_base |  | ||||||
|   | |||||||
| @@ -1,78 +0,0 @@ | |||||||
| /********************************************************************** |  | ||||||
|   Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). |  | ||||||
|  |  | ||||||
|   Redistribution and use in source and binary forms, with or without |  | ||||||
|   modification, are permitted provided that the following conditions |  | ||||||
|   are met: |  | ||||||
|     * Redistributions of source code must retain the above copyright |  | ||||||
|       notice, this list of conditions and the following disclaimer. |  | ||||||
|     * Redistributions in binary form must reproduce the above copyright |  | ||||||
|       notice, this list of conditions and the following disclaimer in |  | ||||||
|       the documentation and/or other materials provided with the |  | ||||||
|       distribution. |  | ||||||
|     * Neither the name of ISCAS nor the names of its |  | ||||||
|       contributors may be used to endorse or promote products derived |  | ||||||
|       from this software without specific prior written permission. |  | ||||||
|  |  | ||||||
|   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |  | ||||||
|   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |  | ||||||
|   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |  | ||||||
|   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |  | ||||||
|   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |  | ||||||
|   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |  | ||||||
|   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |  | ||||||
|   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |  | ||||||
|   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |  | ||||||
|   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |  | ||||||
|   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |  | ||||||
| **********************************************************************/ |  | ||||||
| #include "riscv64_multibinary.h" |  | ||||||
|  |  | ||||||
| extern int |  | ||||||
| gf_vect_mul_rvv(int len, unsigned char *a, unsigned char *src, unsigned char *dest); |  | ||||||
| extern int |  | ||||||
| gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest); |  | ||||||
| extern void |  | ||||||
| gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest); |  | ||||||
| extern void |  | ||||||
| gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src, |  | ||||||
|                       unsigned char *dest); |  | ||||||
| extern void |  | ||||||
| ec_encode_data_rvv(int len, int srcs, int dests, unsigned char *v, unsigned char **src, |  | ||||||
|                    unsigned char **dest); |  | ||||||
| extern void |  | ||||||
| ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, |  | ||||||
|                     unsigned char **dest); |  | ||||||
|  |  | ||||||
| DEFINE_INTERFACE_DISPATCHER(gf_vect_mul) |  | ||||||
| { |  | ||||||
| #if HAVE_RVV |  | ||||||
|         const unsigned long hwcap = getauxval(AT_HWCAP); |  | ||||||
|         if (hwcap & HWCAP_RV('V')) |  | ||||||
|                 return gf_vect_mul_rvv; |  | ||||||
|         else |  | ||||||
| #endif |  | ||||||
|                 return gf_vect_mul_base; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod) |  | ||||||
| { |  | ||||||
| #if HAVE_RVV |  | ||||||
|         const unsigned long hwcap = getauxval(AT_HWCAP); |  | ||||||
|         if (hwcap & HWCAP_RV('V')) |  | ||||||
|                 return gf_vect_dot_prod_rvv; |  | ||||||
|         else |  | ||||||
| #endif |  | ||||||
|                 return gf_vect_dot_prod_base; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| DEFINE_INTERFACE_DISPATCHER(ec_encode_data) |  | ||||||
| { |  | ||||||
| #if HAVE_RVV |  | ||||||
|         const unsigned long hwcap = getauxval(AT_HWCAP); |  | ||||||
|         if (hwcap & HWCAP_RV('V')) |  | ||||||
|                 return ec_encode_data_rvv; |  | ||||||
|         else |  | ||||||
| #endif |  | ||||||
|                 return ec_encode_data_base; |  | ||||||
| } |  | ||||||
							
								
								
									
										147
									
								
								erasure_code/riscv64/ec_riscv64_dispatcher.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								erasure_code/riscv64/ec_riscv64_dispatcher.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,147 @@ | |||||||
|  | /************************************************************** | ||||||
|  |   Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  |  | ||||||
|  |   Redistribution and use in source and binary forms, with or without | ||||||
|  |   modification, are permitted provided that the following conditions | ||||||
|  |   are met: | ||||||
|  |     * Redistributions of source code must retain the above copyright | ||||||
|  |       notice, this list of conditions and the following disclaimer. | ||||||
|  |     * Redistributions in binary form must reproduce the above copyright | ||||||
|  |       notice, this list of conditions and the following disclaimer in | ||||||
|  |       the documentation and/or other materials provided with the | ||||||
|  |       distribution. | ||||||
|  |     * Neither the name of sanechips Corporation nor the names of its | ||||||
|  |       contributors may be used to endorse or promote products derived | ||||||
|  |       from this software without specific prior written permission. | ||||||
|  |  | ||||||
|  |   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  |   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  |   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  |   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  |   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  |   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  |   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  |   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  |   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  |   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  |   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | **********************************************************************/ | ||||||
|  |  | ||||||
|  | /********************************************************************** | ||||||
|  |   Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). | ||||||
|  |  | ||||||
|  |   Redistribution and use in source and binary forms, with or without | ||||||
|  |   modification, are permitted provided that the following conditions | ||||||
|  |   are met: | ||||||
|  |     * Redistributions of source code must retain the above copyright | ||||||
|  |       notice, this list of conditions and the following disclaimer. | ||||||
|  |     * Redistributions in binary form must reproduce the above copyright | ||||||
|  |       notice, this list of conditions and the following disclaimer in | ||||||
|  |       the documentation and/or other materials provided with the | ||||||
|  |       distribution. | ||||||
|  |     * Neither the name of ISCAS nor the names of its | ||||||
|  |       contributors may be used to endorse or promote products derived | ||||||
|  |       from this software without specific prior written permission. | ||||||
|  |  | ||||||
|  |   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  |   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  |   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  |   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  |   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  |   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  |   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  |   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  |   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  |   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  |   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | **********************************************************************/ | ||||||
|  |  | ||||||
|  | #include "riscv64_multibinary.h" | ||||||
|  |  | ||||||
|  | extern void | ||||||
|  | gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, | ||||||
|  |                      unsigned char *dest); | ||||||
|  | extern void | ||||||
|  | gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src, | ||||||
|  |                       unsigned char *dest); | ||||||
|  | extern void | ||||||
|  | gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | ||||||
|  |                 unsigned char *dest); | ||||||
|  | extern void | ||||||
|  | gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, | ||||||
|  |                  unsigned char *dest); | ||||||
|  | extern void | ||||||
|  | ec_encode_data_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls, unsigned char *data, | ||||||
|  |                    unsigned char **coding); | ||||||
|  | extern void | ||||||
|  | ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, | ||||||
|  |                     unsigned char **dest); | ||||||
|  | extern int | ||||||
|  | gf_vect_mul_rvv(int len, unsigned char *a, unsigned char *src, unsigned char *dest); | ||||||
|  | extern int | ||||||
|  | gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest); | ||||||
|  | extern void | ||||||
|  | ec_encode_data_update_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls, | ||||||
|  |                           unsigned char *data, unsigned char **coding); | ||||||
|  | extern void | ||||||
|  | ec_encode_data_update_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, | ||||||
|  |                            unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls); | ||||||
|  |  | ||||||
|  | DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod) | ||||||
|  | { | ||||||
|  | #if HAVE_RVV | ||||||
|  |         unsigned long auxval = getauxval(AT_HWCAP); | ||||||
|  |  | ||||||
|  |         if (auxval & HWCAP_RV('V')) | ||||||
|  |                 return gf_vect_dot_prod_rvv; | ||||||
|  | #endif | ||||||
|  |         return gf_vect_dot_prod_base; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | DEFINE_INTERFACE_DISPATCHER(gf_vect_mad) | ||||||
|  | { | ||||||
|  | #if HAVE_RVV | ||||||
|  |         unsigned long auxval = getauxval(AT_HWCAP); | ||||||
|  |  | ||||||
|  |         if (auxval & HWCAP_RV('V')) | ||||||
|  |                 return gf_vect_mad_rvv; | ||||||
|  | #endif | ||||||
|  |         return gf_vect_mad_base; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | DEFINE_INTERFACE_DISPATCHER(ec_encode_data) | ||||||
|  | { | ||||||
|  | #if HAVE_RVV | ||||||
|  |         unsigned long auxval = getauxval(AT_HWCAP); | ||||||
|  |  | ||||||
|  |         if (auxval & HWCAP_RV('V')) | ||||||
|  |                 return ec_encode_data_rvv; | ||||||
|  | #endif | ||||||
|  |         return ec_encode_data_base; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update) | ||||||
|  | { | ||||||
|  | #if HAVE_RVV | ||||||
|  |         unsigned long auxval = getauxval(AT_HWCAP); | ||||||
|  |  | ||||||
|  |         if (auxval & HWCAP_RV('V')) | ||||||
|  |                 return ec_encode_data_update_rvv; | ||||||
|  | #endif | ||||||
|  |         return ec_encode_data_update_base; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | DEFINE_INTERFACE_DISPATCHER(gf_vect_mul) | ||||||
|  | { | ||||||
|  | #if HAVE_RVV | ||||||
|  |         unsigned long auxval = getauxval(AT_HWCAP); | ||||||
|  |  | ||||||
|  |         if (auxval & HWCAP_RV('V')) | ||||||
|  |                 return gf_vect_mul_rvv; | ||||||
|  | #endif | ||||||
|  |         return gf_vect_mul_base; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | DEFINE_INTERFACE_DISPATCHER(ec_init_tables) { return ec_init_tables_base; } | ||||||
							
								
								
									
										188
									
								
								erasure_code/riscv64/ec_riscv64_highlevel_func.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										188
									
								
								erasure_code/riscv64/ec_riscv64_highlevel_func.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,188 @@ | |||||||
|  | /************************************************************** | ||||||
|  |   Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  |  | ||||||
|  |   Redistribution and use in source and binary forms, with or without | ||||||
|  |   modification, are permitted provided that the following conditions | ||||||
|  |   are met: | ||||||
|  |     * Redistributions of source code must retain the above copyright | ||||||
|  |       notice, this list of conditions and the following disclaimer. | ||||||
|  |     * Redistributions in binary form must reproduce the above copyright | ||||||
|  |       notice, this list of conditions and the following disclaimer in | ||||||
|  |       the documentation and/or other materials provided with the | ||||||
|  |       distribution. | ||||||
|  |     * Neither the name of sanechips Corporation nor the names of its | ||||||
|  |       contributors may be used to endorse or promote products derived | ||||||
|  |       from this software without specific prior written permission. | ||||||
|  |  | ||||||
|  |   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  |   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  |   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  |   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  |   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  |   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  |   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  |   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  |   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  |   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  |   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | **********************************************************************/ | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | #include "erasure_code.h" | ||||||
|  |  | ||||||
|  | /*external function*/ | ||||||
|  |  | ||||||
|  | /* RVV */ | ||||||
|  | extern void | ||||||
|  | gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, | ||||||
|  |                      unsigned char *dest); | ||||||
|  | extern void | ||||||
|  | gf_2vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, | ||||||
|  |                       unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | gf_3vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, | ||||||
|  |                       unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | gf_4vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, | ||||||
|  |                       unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | gf_5vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, | ||||||
|  |                       unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | gf_6vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, | ||||||
|  |                       unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | gf_7vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, | ||||||
|  |                       unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | gf_8vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, unsigned char **src, | ||||||
|  |                       unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | ||||||
|  |                 unsigned char *dest); | ||||||
|  | extern void | ||||||
|  | gf_2vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | ||||||
|  |                  unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | gf_3vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | ||||||
|  |                  unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | gf_4vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | ||||||
|  |                  unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | gf_5vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | ||||||
|  |                  unsigned char **dest); | ||||||
|  | extern void | ||||||
|  | gf_6vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | ||||||
|  |                  unsigned char **dest); | ||||||
|  |  | ||||||
|  | void | ||||||
|  | ec_encode_data_rvv(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, | ||||||
|  |                    unsigned char **coding) | ||||||
|  | { | ||||||
|  |         if (len < 16) { | ||||||
|  |                 ec_encode_data_base(len, k, rows, g_tbls, data, coding); | ||||||
|  |                 return; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         while (rows > 11) { | ||||||
|  |                 gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 g_tbls += 6 * k * 32; | ||||||
|  |                 coding += 6; | ||||||
|  |                 rows -= 6; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         switch (rows) { | ||||||
|  |         case 11: | ||||||
|  |                 /* 7 + 4 */ | ||||||
|  |                 gf_7vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 g_tbls += 7 * k * 32; | ||||||
|  |                 coding += 7; | ||||||
|  |                 gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 10: | ||||||
|  |                 /* 6 + 4 */ | ||||||
|  |                 gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 g_tbls += 6 * k * 32; | ||||||
|  |                 coding += 6; | ||||||
|  |                 gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 9: | ||||||
|  |                 /* 5 + 4 */ | ||||||
|  |                 gf_5vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 g_tbls += 5 * k * 32; | ||||||
|  |                 coding += 5; | ||||||
|  |                 gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 8: | ||||||
|  |                 /* 4 + 4 */ | ||||||
|  |                 gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 g_tbls += 4 * k * 32; | ||||||
|  |                 coding += 4; | ||||||
|  |                 gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 7: | ||||||
|  |                 gf_7vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 6: | ||||||
|  |                 gf_6vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 5: | ||||||
|  |                 gf_5vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 4: | ||||||
|  |                 gf_4vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 3: | ||||||
|  |                 gf_3vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 2: | ||||||
|  |                 gf_2vect_dot_prod_rvv(len, k, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 1: | ||||||
|  |                 gf_vect_dot_prod_rvv(len, k, g_tbls, data, *coding); | ||||||
|  |                 break; | ||||||
|  |         default: | ||||||
|  |                 break; | ||||||
|  |         } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void | ||||||
|  | ec_encode_data_update_rvv(int len, int k, int rows, int vec_i, unsigned char *g_tbls, | ||||||
|  |                           unsigned char *data, unsigned char **coding) | ||||||
|  | { | ||||||
|  |         if (len < 16) { | ||||||
|  |                 ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); | ||||||
|  |                 return; | ||||||
|  |         } | ||||||
|  |         while (rows > 6) { | ||||||
|  |                 gf_6vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); | ||||||
|  |                 g_tbls += 6 * k * 32; | ||||||
|  |                 coding += 6; | ||||||
|  |                 rows -= 6; | ||||||
|  |         } | ||||||
|  |         switch (rows) { | ||||||
|  |         case 6: | ||||||
|  |                 gf_6vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 5: | ||||||
|  |                 gf_5vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 4: | ||||||
|  |                 gf_4vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 3: | ||||||
|  |                 gf_3vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 2: | ||||||
|  |                 gf_2vect_mad_rvv(len, k, vec_i, g_tbls, data, coding); | ||||||
|  |                 break; | ||||||
|  |         case 1: | ||||||
|  |                 gf_vect_mad_rvv(len, k, vec_i, g_tbls, data, *coding); | ||||||
|  |                 break; | ||||||
|  |         default: | ||||||
|  |                 break; | ||||||
|  |         } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #endif | ||||||
| @@ -1,88 +0,0 @@ | |||||||
| /********************************************************************** |  | ||||||
|   Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). |  | ||||||
|  |  | ||||||
|   Redistribution and use in source and binary forms, with or without |  | ||||||
|   modification, are permitted provided that the following conditions |  | ||||||
|   are met: |  | ||||||
|     * Redistributions of source code must retain the above copyright |  | ||||||
|       notice, this list of conditions and the following disclaimer. |  | ||||||
|     * Redistributions in binary form must reproduce the above copyright |  | ||||||
|       notice, this list of conditions and the following disclaimer in |  | ||||||
|       the documentation and/or other materials provided with the |  | ||||||
|       distribution. |  | ||||||
|     * Neither the name of ISCAS nor the names of its |  | ||||||
|       contributors may be used to endorse or promote products derived |  | ||||||
|       from this software without specific prior written permission. |  | ||||||
|  |  | ||||||
|   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |  | ||||||
|   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |  | ||||||
|   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |  | ||||||
|   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |  | ||||||
|   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |  | ||||||
|   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |  | ||||||
|   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |  | ||||||
|   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |  | ||||||
|   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |  | ||||||
|   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |  | ||||||
|   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |  | ||||||
| **********************************************************************/ |  | ||||||
| // Reference: https://git.ffmpeg.org/gitweb/ffmpeg.git/commit/746f1ff36ac0d232687820fbde4e4efc79093af7 |  | ||||||
| .macro const sym, align=3, relocate=0 |  | ||||||
|         .if \relocate |  | ||||||
|                 .pushsection .data.rel.ro |  | ||||||
|         .else |  | ||||||
|                 .pushsection .rodata |  | ||||||
|         .endif |  | ||||||
|         .align \align |  | ||||||
|         \sym: |  | ||||||
|  |  | ||||||
|         .macro endconst |  | ||||||
|                 .size  \sym, . - \sym |  | ||||||
|                 .popsection |  | ||||||
|                 .purgem endconst |  | ||||||
|         .endm |  | ||||||
| .endm |  | ||||||
|  |  | ||||||
| const gff_base |  | ||||||
|         .rept 2 |  | ||||||
|         .byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13 |  | ||||||
|         .byte 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30 |  | ||||||
|         .byte 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee |  | ||||||
|         .byte 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2 |  | ||||||
|         .byte 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89 |  | ||||||
|         .byte 0x0f, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1 |  | ||||||
|         .byte 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0x0d |  | ||||||
|         .byte 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93 |  | ||||||
|         .byte 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda |  | ||||||
|         .byte 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4 |  | ||||||
|         .byte 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6 |  | ||||||
|         .byte 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b |  | ||||||
|         .byte 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32 |  | ||||||
|         .byte 0x64, 0xc8, 0x8d, 0x07, 0x0e, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2 |  | ||||||
|         .byte 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, 0x12 |  | ||||||
|         .byte 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16 |  | ||||||
|         .byte 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e |  | ||||||
|         .endr |  | ||||||
|         .byte 0x01 |  | ||||||
| endconst |  | ||||||
|  |  | ||||||
| const gflog_base |  | ||||||
|         .byte 0x00, 0xff, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7 |  | ||||||
|         .byte 0x4b, 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08 |  | ||||||
|         .byte 0x4c, 0x71, 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, 0x35, 0x93, 0x8e, 0xda, 0xf0 |  | ||||||
|         .byte 0x12, 0x82, 0x45, 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, 0xc9, 0x9a, 0x09, 0x78 |  | ||||||
|         .byte 0x4d, 0xe4, 0x72, 0xa6, 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, 0xe2, 0x98, 0x25 |  | ||||||
|         .byte 0xb3, 0x10, 0x91, 0x22, 0x88, 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, 0xf1, 0xd2 |  | ||||||
|         .byte 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, 0x6b |  | ||||||
|         .byte 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b |  | ||||||
|         .byte 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63 |  | ||||||
|         .byte 0x0d, 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8 |  | ||||||
|         .byte 0xb4, 0x7c, 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, 0x37, 0x3f, 0xd1, 0x5b, 0x95 |  | ||||||
|         .byte 0xbc, 0xcf, 0xcd, 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, 0xf2, 0x56, 0xd3, 0xab |  | ||||||
|         .byte 0x14, 0x2a, 0x5d, 0x9e, 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, 0x1f, 0x2d, 0x43 |  | ||||||
|         .byte 0xd8, 0xb7, 0x7b, 0xa4, 0x76, 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, 0x6c, 0xa1 |  | ||||||
|         .byte 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, 0xcb |  | ||||||
|         .byte 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7 |  | ||||||
|         .byte 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58 |  | ||||||
|         .byte 0xaf |  | ||||||
| endconst |  | ||||||
							
								
								
									
										161
									
								
								erasure_code/riscv64/gf_2vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										161
									
								
								erasure_code/riscv64/gf_2vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,161 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_2vect_dot_prod_rvv | ||||||
|  | .type gf_2vect_dot_prod_rvv, @function | ||||||
|  |  | ||||||
|  | /* void gf_2vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, | ||||||
|  |                               unsigned char **src, unsigned char **dest); | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0  /* vector length */ | ||||||
|  | #define x_vec a1  /* number of source vectors (ie. data blocks) */ | ||||||
|  | #define x_tbl a2 | ||||||
|  | #define x_src a3 | ||||||
|  | #define x_dest a4 | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_vec_i t0 | ||||||
|  | #define x_ptr t1 | ||||||
|  | #define x_pos t2 | ||||||
|  |  | ||||||
|  | #define x_tbl1 t3 | ||||||
|  | #define x_tbl2 t4 | ||||||
|  | #define x_dest1 t5 | ||||||
|  | #define x_dest2 a7 | ||||||
|  |  | ||||||
|  | /* vectors */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v3 | ||||||
|  |  | ||||||
|  | #define v_dest1 v4 | ||||||
|  |  | ||||||
|  | #define v_gft1_lo v5 | ||||||
|  | #define v_gft1_hi v6 | ||||||
|  |  | ||||||
|  | #define v_gft2_lo v7 | ||||||
|  | #define v_gft2_hi v8 | ||||||
|  | #define v_dest2 v9 | ||||||
|  |  | ||||||
|  | gf_2vect_dot_prod_rvv: | ||||||
|  |     /* less than 16 bytes, return_fail */ | ||||||
|  |     li t6, 16 | ||||||
|  |     blt x_len, t6, .return_fail | ||||||
|  |  | ||||||
|  |     vsetvli a5, x0, e8, m1  /* Set vector length to maximum */ | ||||||
|  |  | ||||||
|  |     li x_pos, 0 | ||||||
|  |     ld x_dest1, 0(x_dest) | ||||||
|  |     ld x_dest2, 8(x_dest) | ||||||
|  |  | ||||||
|  | /* Loop 1: x_len, vector length */ | ||||||
|  | .Llooprvv_vl: | ||||||
|  |     bge x_pos, x_len, .return_pass | ||||||
|  |  | ||||||
|  |     li x_vec_i, 0              /* clear x_vec_i */ | ||||||
|  |     ld x_ptr, 0(x_src)         /* x_ptr: src base addr. */ | ||||||
|  |  | ||||||
|  |     vmv.v.i v_dest1, 0         /* clear v_dest1 */ | ||||||
|  |     vmv.v.i v_dest2, 0         /* clear v_dest2 */ | ||||||
|  |  | ||||||
|  |     /* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */ | ||||||
|  |     mv x_tbl1, x_tbl           /* reset x_tbl1 */ | ||||||
|  |     slli t6, x_vec, 5 | ||||||
|  |     add x_tbl2, x_tbl1, t6     /* reset x_tbl2 */ | ||||||
|  |  | ||||||
|  | /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ | ||||||
|  | .Llooprvv_vl_vects: | ||||||
|  |     /* load src data */ | ||||||
|  |     slli a6, x_vec_i, 3 | ||||||
|  |     add a6,x_src,a6 | ||||||
|  |     ld x_ptr, 0(a6) | ||||||
|  |     add x_ptr,x_ptr,x_pos | ||||||
|  |  | ||||||
|  |     vle8.v v_src, (x_ptr)      /* load from: src base + pos offset */ | ||||||
|  |     /* split 4-bit lo; 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* gf_tbl addr: (x_tbl + dest_idx * x_vec * 32) + src_vec_idx * 32 */ | ||||||
|  |     /* load gf_table's */ | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |     vle8.v v_gft1_hi, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft2_lo, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |     vle8.v v_gft2_hi, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |  | ||||||
|  |     /* dest 1 */ | ||||||
|  |     /* table indexing, ie. gf(2^8) multiplication */ | ||||||
|  |     vrgather.vv v26, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft1_hi, v_src_hi | ||||||
|  |     /* exclusive or, ie. gf(2^8) add */ | ||||||
|  |     vxor.vv v_dest1, v_dest1, v26 | ||||||
|  |     vxor.vv v_dest1, v_dest1, v27 | ||||||
|  |  | ||||||
|  |     /* dest 2 */ | ||||||
|  |     vrgather.vv v26, v_gft2_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft2_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest2, v_dest2, v26 | ||||||
|  |     vxor.vv v_dest2, v_dest2, v27 | ||||||
|  |  | ||||||
|  |     /* calc for next */ | ||||||
|  |     addi x_vec_i, x_vec_i, 1   /* move x_vec_i to next */ | ||||||
|  |     blt x_vec_i, x_vec, .Llooprvv_vl_vects | ||||||
|  | /* end of Loop 2 */ | ||||||
|  |  | ||||||
|  |     /* store dest data */ | ||||||
|  |     vse8.v v_dest1, (x_dest1) | ||||||
|  |     vse8.v v_dest2, (x_dest2) | ||||||
|  |     add x_dest1,x_dest1,a5 | ||||||
|  |     add x_dest2,x_dest2,a5 | ||||||
|  |  | ||||||
|  |     /* increment one vector length */ | ||||||
|  |     add x_pos, x_pos, a5 | ||||||
|  |     j .Llooprvv_vl | ||||||
|  | /* end of Loop 1 */ | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     li a0, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li a0, 1 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										148
									
								
								erasure_code/riscv64/gf_2vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										148
									
								
								erasure_code/riscv64/gf_2vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,148 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_2vect_mad_rvv | ||||||
|  | .type gf_2vect_mad_rvv, @function | ||||||
|  |  | ||||||
|  | /* gf_2vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, | ||||||
|  |                    unsigned char *src, unsigned char **dest); | ||||||
|  |  */ | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0 | ||||||
|  | #define x_vec a1 | ||||||
|  | #define x_vec_i a2 | ||||||
|  | #define x_tbl a3 | ||||||
|  | #define x_src a4 | ||||||
|  | #define x_dest a5 | ||||||
|  |  | ||||||
|  | /* returns */ | ||||||
|  | #define w_ret a0 | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_pos t0 | ||||||
|  | #define x_dest2 t1 | ||||||
|  | #define x_dest1 t2 | ||||||
|  |  | ||||||
|  | /* vectors */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v1 | ||||||
|  | #define v_dest1 v3 | ||||||
|  | #define v_tmp_lo v4 | ||||||
|  | #define v_tmp_hi v5 | ||||||
|  | #define v_gft1_lo v6 | ||||||
|  | #define v_gft1_hi v7 | ||||||
|  | #define v_gft2_lo v17 | ||||||
|  | #define v_gft2_hi v18 | ||||||
|  | #define v_dest2 v27 | ||||||
|  |  | ||||||
|  | gf_2vect_mad_rvv: | ||||||
|  |     /* less than 16 bytes, return_fail */ | ||||||
|  |     li t3, 16 | ||||||
|  |     blt x_len, t3, .return_fail | ||||||
|  |  | ||||||
|  |     vsetvli t4, x0, e8, m1 | ||||||
|  |  | ||||||
|  |     /* load table 1 */ | ||||||
|  |     slli t3, x_vec_i, 5 | ||||||
|  |     add x_tbl, x_tbl, t3 | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl) | ||||||
|  |     addi t3, x_tbl, 16 | ||||||
|  |     vle8.v v_gft1_hi, (t3) | ||||||
|  |  | ||||||
|  |     /* load table 2 */ | ||||||
|  |     slli t3, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, t3 | ||||||
|  |     vle8.v v_gft2_lo, (x_tbl) | ||||||
|  |     addi t3, x_tbl, 16 | ||||||
|  |     vle8.v v_gft2_hi, (t3) | ||||||
|  |  | ||||||
|  |     /* load dest pointers */ | ||||||
|  |     ld x_dest1, 0(x_dest) | ||||||
|  |     ld x_dest2, 8(x_dest) | ||||||
|  |  | ||||||
|  |     li x_pos, 0 | ||||||
|  |  | ||||||
|  | .Llooprvv_vl: | ||||||
|  |     blt x_pos, x_len, .Lloop_body | ||||||
|  |     j .return_pass | ||||||
|  |  | ||||||
|  | .Lloop_body: | ||||||
|  |     /* load src data */ | ||||||
|  |     add t3, x_src, x_pos | ||||||
|  |     vle8.v v_src, (t3) | ||||||
|  |  | ||||||
|  |     /* split 4-bit lo; 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* load dest data */ | ||||||
|  |     add t3, x_dest1, x_pos | ||||||
|  |     vle8.v v_dest1, (t3) | ||||||
|  |     add t3, x_dest2, x_pos | ||||||
|  |     vle8.v v_dest2, (t3) | ||||||
|  |  | ||||||
|  |     /* dest1 */ | ||||||
|  |     /* table indexing, ie. gf(2^8) multiplication */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi | ||||||
|  |     /* exclusive or, ie. gf(2^8) add */ | ||||||
|  |     vxor.vv v_dest1, v_tmp_lo, v_dest1 | ||||||
|  |     vxor.vv v_dest1, v_tmp_hi, v_dest1 | ||||||
|  |  | ||||||
|  |     /* dest2 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest2, v_tmp_lo, v_dest2 | ||||||
|  |     vxor.vv v_dest2, v_tmp_hi, v_dest2 | ||||||
|  |  | ||||||
|  |     /* store dest data */ | ||||||
|  |     add t3, x_dest1, x_pos | ||||||
|  |     vse8.v v_dest1, (t3) | ||||||
|  |     add t3, x_dest2, x_pos | ||||||
|  |     vse8.v v_dest2, (t3) | ||||||
|  |  | ||||||
|  |     /* increment one vector length */ | ||||||
|  |     add x_pos, x_pos, t4 | ||||||
|  |  | ||||||
|  |     j .Llooprvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     li w_ret, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li w_ret, 1 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										188
									
								
								erasure_code/riscv64/gf_3vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										188
									
								
								erasure_code/riscv64/gf_3vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,188 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_3vect_dot_prod_rvv | ||||||
|  | .type gf_3vect_dot_prod_rvv, @function | ||||||
|  |  | ||||||
|  | /* void gf_3vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, | ||||||
|  |                               unsigned char **src, unsigned char **dest); | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0  /* vector length */ | ||||||
|  | #define x_vec a1  /* number of source vectors (ie. data blocks) */ | ||||||
|  | #define x_tbl a2  /* gftbls */ | ||||||
|  | #define x_src a3  /* src */ | ||||||
|  | #define x_dest a4 /* dest */ | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_vec_i t1 | ||||||
|  | #define x_ptr t2 | ||||||
|  | #define x_pos t3 | ||||||
|  | #define x_tbl1 t4 | ||||||
|  | #define x_tbl2 t5 | ||||||
|  | #define x_tbl3 t6 | ||||||
|  | #define x_dest1 s0 | ||||||
|  | #define x_dest2 s1 | ||||||
|  | #define x_dest3 a5 | ||||||
|  | #define t_offset a6 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* vectors */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v3 | ||||||
|  |  | ||||||
|  | #define v_dest1 v4 | ||||||
|  | #define v_dest2 v5 | ||||||
|  | #define v_dest3 v6 | ||||||
|  |  | ||||||
|  | #define v_gft1_lo v8 | ||||||
|  | #define v_gft1_hi v9 | ||||||
|  | #define v_gft2_lo v10 | ||||||
|  | #define v_gft2_hi v11 | ||||||
|  | #define v_gft3_lo v12 | ||||||
|  | #define v_gft3_hi v13 | ||||||
|  |  | ||||||
|  | gf_3vect_dot_prod_rvv: | ||||||
|  |     /* less than 16 bytes, return_fail */ | ||||||
|  |     li t0, 16 | ||||||
|  |     blt x_len, t0, .return_fail | ||||||
|  |  | ||||||
|  |     /* save callee-saved registers */ | ||||||
|  |     addi sp, sp, -16 | ||||||
|  |     sd s0, 0(sp) | ||||||
|  |     sd s1, 8(sp) | ||||||
|  |  | ||||||
|  |     vsetvli a7, x0, e8, m1  /* Set vector length to maximum */ | ||||||
|  |  | ||||||
|  |     li x_pos, 0 | ||||||
|  |     slli t_offset, x_vec, 5 | ||||||
|  |     ld x_dest1, 0(x_dest) | ||||||
|  |     ld x_dest2, 8(x_dest) | ||||||
|  |     ld x_dest3, 16(x_dest) | ||||||
|  |  | ||||||
|  | .Lloop_rvv_vl: | ||||||
|  |     /* check if we have processed all elements */ | ||||||
|  |     bge x_pos, x_len, .return_pass | ||||||
|  |  | ||||||
|  |     /* Clear destination vectors */ | ||||||
|  |     vmv.v.i v_dest1, 0 | ||||||
|  |     vmv.v.i v_dest2, 0 | ||||||
|  |     vmv.v.i v_dest3, 0 | ||||||
|  |  | ||||||
|  |     /* Reset table pointers */ | ||||||
|  |     mv x_tbl1, x_tbl | ||||||
|  |     add x_tbl2, x_tbl1, t_offset | ||||||
|  |     add x_tbl3, x_tbl2, t_offset | ||||||
|  |  | ||||||
|  |     /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ | ||||||
|  |     li x_vec_i, 0 | ||||||
|  | .Lloop_rvv_vl_vects: | ||||||
|  |     /* Load source data */ | ||||||
|  |     slli t0, x_vec_i, 3 | ||||||
|  |     add t0,x_src,t0 | ||||||
|  |     ld x_ptr, 0(t0) | ||||||
|  |     add x_ptr,x_ptr,x_pos | ||||||
|  |  | ||||||
|  |     vle8.v v_src, (x_ptr) | ||||||
|  |  | ||||||
|  |     /* Split 4-bit lo; 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* Load gf_table's */ | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |     vle8.v v_gft1_hi, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |     vle8.v v_gft2_lo, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |     vle8.v v_gft2_hi, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /* Load next gf_table's */ | ||||||
|  |     vle8.v v_gft3_lo, (x_tbl3) | ||||||
|  |     addi x_tbl3, x_tbl3, 16 | ||||||
|  |     vle8.v v_gft3_hi, (x_tbl3) | ||||||
|  |     addi x_tbl3, x_tbl3, 16 | ||||||
|  |  | ||||||
|  | /* dest 1 */ | ||||||
|  |     vrgather.vv v26, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft1_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest1, v_dest1, v26 | ||||||
|  |     vxor.vv v_dest1, v_dest1, v27 | ||||||
|  |  | ||||||
|  |     /* dest 2 */ | ||||||
|  |     vrgather.vv v26, v_gft2_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft2_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest2, v_dest2, v26 | ||||||
|  |     vxor.vv v_dest2, v_dest2, v27 | ||||||
|  |  | ||||||
|  |     /* dest 3 */ | ||||||
|  |     vrgather.vv v26, v_gft3_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft3_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest3, v_dest3, v26 | ||||||
|  |     vxor.vv v_dest3, v_dest3, v27 | ||||||
|  |  | ||||||
|  |     /* Move to next source vector */ | ||||||
|  |     addi x_vec_i, x_vec_i, 1 | ||||||
|  |  | ||||||
|  |     /* Check if we have processed all vectors */ | ||||||
|  |     blt x_vec_i, x_vec, .Lloop_rvv_vl_vects | ||||||
|  |  | ||||||
|  |     /* Store destination data */ | ||||||
|  |     vse8.v v_dest1, (x_dest1) | ||||||
|  |     vse8.v v_dest2, (x_dest2) | ||||||
|  |     vse8.v v_dest3, (x_dest3) | ||||||
|  |     add x_dest1,x_dest1, a7 | ||||||
|  |     add x_dest2,x_dest2, a7 | ||||||
|  |     add x_dest3,x_dest3, a7 | ||||||
|  |  | ||||||
|  |     add x_pos, x_pos, a7 | ||||||
|  |     j .Lloop_rvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     ld s0, 0(sp) | ||||||
|  |     ld s1, 8(sp) | ||||||
|  |     addi sp, sp, 16 | ||||||
|  |  | ||||||
|  |     li a0, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li a0, 1 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										170
									
								
								erasure_code/riscv64/gf_3vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								erasure_code/riscv64/gf_3vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,170 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_3vect_mad_rvv | ||||||
|  | .type gf_3vect_mad_rvv, @function | ||||||
|  |  | ||||||
|  | /* gf_3vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, | ||||||
|  |                    unsigned char *src, unsigned char **dest); | ||||||
|  |  */ | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0 | ||||||
|  | #define x_vec a1 | ||||||
|  | #define x_vec_i a2 | ||||||
|  | #define x_tbl a3 | ||||||
|  | #define x_src a4 | ||||||
|  | #define x_dest a5 | ||||||
|  |  | ||||||
|  | /* returns */ | ||||||
|  | #define w_ret a0 | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_pos t0 | ||||||
|  | #define x_dest1 t1 | ||||||
|  | #define x_dest2 t2 | ||||||
|  | #define x_dest3 t3 | ||||||
|  |  | ||||||
|  | /* vectors */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v3 | ||||||
|  | #define v_dest1 v4 | ||||||
|  | #define v_tmp_lo v5 | ||||||
|  | #define v_tmp_hi v6 | ||||||
|  | #define v_gft1_lo v7 | ||||||
|  | #define v_gft1_hi v8 | ||||||
|  | #define v_gft2_lo v9 | ||||||
|  | #define v_gft2_hi v10 | ||||||
|  | #define v_gft3_lo v11 | ||||||
|  | #define v_gft3_hi v12 | ||||||
|  | #define v_dest2 v19 | ||||||
|  | #define v_dest3 v20 | ||||||
|  |  | ||||||
|  | gf_3vect_mad_rvv: | ||||||
|  |     /* less than 16 bytes, return_fail */ | ||||||
|  |     li t4, 16 | ||||||
|  |     blt x_len, t4, .return_fail | ||||||
|  |  | ||||||
|  |     vsetvli t5, x0, e8, m1 | ||||||
|  |  | ||||||
|  |     /* Load table 1 */ | ||||||
|  |     slli t4, x_vec_i, 5 | ||||||
|  |     add x_tbl, x_tbl, t4 | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl) | ||||||
|  |     addi t4, x_tbl, 16 | ||||||
|  |     vle8.v v_gft1_hi, (t4) | ||||||
|  |  | ||||||
|  |     /* Load table 2 */ | ||||||
|  |     slli t4, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, t4 | ||||||
|  |     vle8.v v_gft2_lo, (x_tbl) | ||||||
|  |     addi t4, x_tbl, 16 | ||||||
|  |     vle8.v v_gft2_hi, (t4) | ||||||
|  |  | ||||||
|  |     /* Load table 3 */ | ||||||
|  |     slli t4, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, t4 | ||||||
|  |     vle8.v v_gft3_lo, (x_tbl) | ||||||
|  |     addi t4, x_tbl, 16 | ||||||
|  |     vle8.v v_gft3_hi, (t4) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /* Load destination pointers */ | ||||||
|  |     ld x_dest1, 0(x_dest) | ||||||
|  |     ld x_dest2, 8(x_dest) | ||||||
|  |     ld x_dest3, 16(x_dest) | ||||||
|  |  | ||||||
|  |     li x_pos, 0 | ||||||
|  |  | ||||||
|  | .Llooprvv_vl: | ||||||
|  |     blt x_pos, x_len, .Lloop_body | ||||||
|  |     j .return_pass | ||||||
|  |  | ||||||
|  | .Lloop_body: | ||||||
|  |     /* Load source data */ | ||||||
|  |     add t6, x_src, x_pos | ||||||
|  |     vle8.v v_src, (t6) | ||||||
|  |  | ||||||
|  |     /* Split 4-bit lo; 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* load dest data */ | ||||||
|  |     add t6, x_dest1, x_pos | ||||||
|  |     vle8.v v_dest1, (t6) | ||||||
|  |     add t6, x_dest2, x_pos | ||||||
|  |     vle8.v v_dest2, (t6) | ||||||
|  |     add t6, x_dest3, x_pos | ||||||
|  |     vle8.v v_dest3, (t6) | ||||||
|  |  | ||||||
|  |     /* dest1 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest1, v_tmp_lo, v_dest1 | ||||||
|  |     vxor.vv v_dest1, v_tmp_hi, v_dest1 | ||||||
|  |  | ||||||
|  |     /* dest2 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest2, v_tmp_lo, v_dest2 | ||||||
|  |     vxor.vv v_dest2, v_tmp_hi, v_dest2 | ||||||
|  |  | ||||||
|  |     /* dest3 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest3, v_tmp_lo, v_dest3 | ||||||
|  |     vxor.vv v_dest3, v_tmp_hi, v_dest3 | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /* Store destination data */ | ||||||
|  |     add t6, x_dest1, x_pos | ||||||
|  |     vse8.v v_dest1, (t6) | ||||||
|  |     add t6, x_dest2, x_pos | ||||||
|  |     vse8.v v_dest2, (t6) | ||||||
|  |     add t6, x_dest3, x_pos | ||||||
|  |     vse8.v v_dest3, (t6) | ||||||
|  |  | ||||||
|  |     /* Increment position */ | ||||||
|  |     add x_pos, x_pos, t5 | ||||||
|  |  | ||||||
|  |     j .Llooprvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     li w_ret, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li w_ret, 1 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										214
									
								
								erasure_code/riscv64/gf_4vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										214
									
								
								erasure_code/riscv64/gf_4vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,214 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_4vect_dot_prod_rvv | ||||||
|  | .type gf_4vect_dot_prod_rvv, @function | ||||||
|  |  | ||||||
|  | /* void gf_4vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, | ||||||
|  |                              unsigned char **src, unsigned char **dest); | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0  /* vector length */ | ||||||
|  | #define x_vec a1  /* number of source vectors (ie. data blocks) */ | ||||||
|  | #define x_tbl a2 | ||||||
|  | #define x_src a3 | ||||||
|  | #define x_dest a4 | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_vec_i a7 | ||||||
|  | #define x_ptr t1 | ||||||
|  | #define x_pos t2 | ||||||
|  |  | ||||||
|  | #define x_tbl1 t3 | ||||||
|  | #define x_tbl2 t4 | ||||||
|  | #define x_tbl3 t5 | ||||||
|  | #define x_tbl4 t6 | ||||||
|  | #define x_dest1 s0 | ||||||
|  | #define x_dest2 s1 | ||||||
|  | #define x_dest3 s2 | ||||||
|  | #define x_dest4 s3 | ||||||
|  | #define t_offset a5 | ||||||
|  |  | ||||||
|  | /* vectors */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v3 | ||||||
|  |  | ||||||
|  | #define v_dest1 v4 | ||||||
|  | #define v_dest2 v5 | ||||||
|  | #define v_dest3 v6 | ||||||
|  | #define v_dest4 v7 | ||||||
|  |  | ||||||
|  | #define v_gft1_lo v8 | ||||||
|  | #define v_gft1_hi v9 | ||||||
|  | #define v_gft2_lo v10 | ||||||
|  | #define v_gft2_hi v11 | ||||||
|  | #define v_gft3_lo v12 | ||||||
|  | #define v_gft3_hi v13 | ||||||
|  | #define v_gft4_lo v14 | ||||||
|  | #define v_gft4_hi v15 | ||||||
|  |  | ||||||
|  | gf_4vect_dot_prod_rvv: | ||||||
|  |     /* less than 16 bytes, return_fail */ | ||||||
|  |     li t0, 16 | ||||||
|  |     blt x_len, t0, .return_fail | ||||||
|  |  | ||||||
|  | /* save callee-saved registers */ | ||||||
|  |     addi sp, sp, -32 | ||||||
|  |     sd s0, 0(sp) | ||||||
|  |     sd s1, 8(sp) | ||||||
|  |     sd s2, 16(sp) | ||||||
|  |     sd s3, 24(sp) | ||||||
|  |  | ||||||
|  |     vsetvli t0, x0, e8, m1  /* Set vector length to maximum */ | ||||||
|  |  | ||||||
|  |     li x_pos, 0 | ||||||
|  |     slli t_offset, x_vec, 5 | ||||||
|  |     ld x_dest1, 0(x_dest) | ||||||
|  |     ld x_dest2, 8(x_dest) | ||||||
|  |     ld x_dest3, 16(x_dest) | ||||||
|  |     ld x_dest4, 24(x_dest) | ||||||
|  |  | ||||||
|  | /* Loop 1: x_len, vector length */ | ||||||
|  | .Lloop_rvv_vl: | ||||||
|  |     /* check if we have processed all elements */ | ||||||
|  |     bge x_pos, x_len, .return_pass | ||||||
|  |  | ||||||
|  |     /* Clear destination vectors */ | ||||||
|  |     vmv.v.i v_dest1, 0 | ||||||
|  |     vmv.v.i v_dest2, 0 | ||||||
|  |     vmv.v.i v_dest3, 0 | ||||||
|  |     vmv.v.i v_dest4, 0 | ||||||
|  |  | ||||||
|  |     /* Reset table pointers */ | ||||||
|  |     mv x_tbl1, x_tbl | ||||||
|  |     add x_tbl2, x_tbl1, t_offset | ||||||
|  |     add x_tbl3, x_tbl2, t_offset | ||||||
|  |     add x_tbl4, x_tbl3, t_offset | ||||||
|  |  | ||||||
|  |     /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ | ||||||
|  |     li x_vec_i, 0 | ||||||
|  | .Lloop_rvv_vl_vects: | ||||||
|  |     /* Load source data */ | ||||||
|  |     slli a6, x_vec_i, 3 | ||||||
|  |     add a6,x_src,a6 | ||||||
|  |     ld x_ptr, 0(a6) | ||||||
|  |     add x_ptr,x_ptr,x_pos | ||||||
|  |  | ||||||
|  |     vle8.v v_src, (x_ptr) | ||||||
|  |  | ||||||
|  |     /* Split 4-bit lo; 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* Load gf_table's */ | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |     vle8.v v_gft1_hi, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |     vle8.v v_gft2_lo, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |     vle8.v v_gft2_hi, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /* Load next gf_table's */ | ||||||
|  |     vle8.v v_gft3_lo, (x_tbl3) | ||||||
|  |     addi x_tbl3, x_tbl3, 16 | ||||||
|  |     vle8.v v_gft3_hi, (x_tbl3) | ||||||
|  |     addi x_tbl3, x_tbl3, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft4_lo, (x_tbl4) | ||||||
|  |     addi x_tbl4, x_tbl4, 16 | ||||||
|  |     vle8.v v_gft4_hi, (x_tbl4) | ||||||
|  |     addi x_tbl4, x_tbl4, 16 | ||||||
|  |  | ||||||
|  |     /* dest 1 */ | ||||||
|  |     vrgather.vv v26, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft1_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest1, v_dest1, v26 | ||||||
|  |     vxor.vv v_dest1, v_dest1, v27 | ||||||
|  |  | ||||||
|  |     /* dest 2 */ | ||||||
|  |     vrgather.vv v26, v_gft2_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft2_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest2, v_dest2, v26 | ||||||
|  |     vxor.vv v_dest2, v_dest2, v27 | ||||||
|  |  | ||||||
|  |     /* dest 3 */ | ||||||
|  |     vrgather.vv v26, v_gft3_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft3_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest3, v_dest3, v26 | ||||||
|  |     vxor.vv v_dest3, v_dest3, v27 | ||||||
|  |  | ||||||
|  |     /* dest 4 */ | ||||||
|  |     vrgather.vv v26, v_gft4_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft4_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest4, v_dest4, v26 | ||||||
|  |     vxor.vv v_dest4, v_dest4, v27 | ||||||
|  |  | ||||||
|  |     /* Move to next source vector */ | ||||||
|  |     addi x_vec_i, x_vec_i, 1 | ||||||
|  |  | ||||||
|  |     /* Check if we have processed all vectors */ | ||||||
|  |     blt x_vec_i, x_vec, .Lloop_rvv_vl_vects | ||||||
|  |  | ||||||
|  |     /* Store destination data */ | ||||||
|  |     vse8.v v_dest1, (x_dest1) | ||||||
|  |     vse8.v v_dest2, (x_dest2) | ||||||
|  |     vse8.v v_dest3, (x_dest3) | ||||||
|  |     vse8.v v_dest4, (x_dest4) | ||||||
|  |     add x_dest1,x_dest1, t0 | ||||||
|  |     add x_dest2,x_dest2, t0 | ||||||
|  |     add x_dest3,x_dest3, t0 | ||||||
|  |     add x_dest4,x_dest4, t0 | ||||||
|  |     /* Increment position */ | ||||||
|  |     add x_pos, x_pos, t0 | ||||||
|  |     j .Lloop_rvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  | /* restore callee-saved registers */ | ||||||
|  |     ld s0, 0(sp) | ||||||
|  |     ld s1, 8(sp) | ||||||
|  |     ld s2, 16(sp) | ||||||
|  |     ld s3, 24(sp) | ||||||
|  |     addi sp, sp, 32 | ||||||
|  |     li a0, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li a0, 1 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										189
									
								
								erasure_code/riscv64/gf_4vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										189
									
								
								erasure_code/riscv64/gf_4vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,189 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_4vect_mad_rvv | ||||||
|  | .type gf_4vect_mad_rvv, @function | ||||||
|  |  | ||||||
|  | /* gf_4vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, | ||||||
|  |                    unsigned char *src, unsigned char **dest); | ||||||
|  |  */ | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0 | ||||||
|  | #define x_vec a1 | ||||||
|  | #define x_vec_i a2 | ||||||
|  | #define x_tbl a3 | ||||||
|  | #define x_src a4 | ||||||
|  | #define x_dest a5 | ||||||
|  |  | ||||||
|  | /* returns */ | ||||||
|  | #define w_ret a0 | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_pos t0 | ||||||
|  | #define x_dest1	t1 | ||||||
|  | #define x_dest2	t2 | ||||||
|  | #define x_dest3	t3 | ||||||
|  | #define x_dest4	t4 | ||||||
|  |  | ||||||
|  | /* vectors */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v3 | ||||||
|  | #define v_dest1 v4 | ||||||
|  | #define v_tmp_lo v5 | ||||||
|  | #define v_tmp_hi v6 | ||||||
|  | #define v_gft1_lo v7 | ||||||
|  | #define v_gft1_hi v8 | ||||||
|  | #define v_gft2_lo v9 | ||||||
|  | #define v_gft2_hi v10 | ||||||
|  | #define v_gft3_lo v11 | ||||||
|  | #define v_gft3_hi v12 | ||||||
|  | #define v_gft4_lo v13 | ||||||
|  | #define v_gft4_hi v14 | ||||||
|  | #define v_dest2 v15 | ||||||
|  | #define v_dest3 v16 | ||||||
|  | #define v_dest4 v17 | ||||||
|  |  | ||||||
|  | gf_4vect_mad_rvv: | ||||||
|  | 	/* less than 16 bytes, return_fail */ | ||||||
|  | 	li	t5, 16 | ||||||
|  | 	blt	x_len, t5, .return_fail | ||||||
|  |  | ||||||
|  | 	vsetvli	t6, x0, e8, m1 | ||||||
|  |  | ||||||
|  | 	/* load table 1 */ | ||||||
|  | 	slli	t5, x_vec_i, 5 | ||||||
|  | 	add	x_tbl, x_tbl, t5 | ||||||
|  | 	vle8.v	v_gft1_lo, (x_tbl) | ||||||
|  | 	addi	t5, x_tbl, 16 | ||||||
|  | 	vle8.v	v_gft1_hi, (t5) | ||||||
|  |  | ||||||
|  | 	/* load table 2 */ | ||||||
|  | 	slli	t5, x_vec, 5 | ||||||
|  | 	add	x_tbl, x_tbl, t5 | ||||||
|  | 	vle8.v	v_gft2_lo, (x_tbl) | ||||||
|  | 	addi	t5, x_tbl, 16 | ||||||
|  | 	vle8.v	v_gft2_hi, (t5) | ||||||
|  |  | ||||||
|  | 	/* load table 3 */ | ||||||
|  |         slli    t5, x_vec, 5 | ||||||
|  | 	add	x_tbl, x_tbl, t5 | ||||||
|  | 	vle8.v	v_gft3_lo, (x_tbl) | ||||||
|  | 	addi	t5, x_tbl, 16 | ||||||
|  | 	vle8.v	v_gft3_hi, (t5) | ||||||
|  |  | ||||||
|  | 	/* load table 4 */ | ||||||
|  |         slli    t5, x_vec, 5 | ||||||
|  | 	add	x_tbl, x_tbl, t5 | ||||||
|  | 	vle8.v	v_gft4_lo, (x_tbl) | ||||||
|  | 	addi	t5, x_tbl, 16 | ||||||
|  | 	vle8.v	v_gft4_hi, (t5) | ||||||
|  |  | ||||||
|  | 	/* load dest pointers */ | ||||||
|  | 	ld	x_dest1, 0(x_dest) | ||||||
|  | 	ld	x_dest2, 8(x_dest) | ||||||
|  | 	ld	x_dest3, 16(x_dest) | ||||||
|  | 	ld	x_dest4, 24(x_dest) | ||||||
|  |  | ||||||
|  | 	li	x_pos, 0 | ||||||
|  |  | ||||||
|  | .Llooprvv_vl: | ||||||
|  | 	blt	x_pos, x_len, .Lloop_body | ||||||
|  | 	j	.return_pass | ||||||
|  | .Lloop_body: | ||||||
|  | 	/* load src data */ | ||||||
|  |         add t5, x_src, x_pos | ||||||
|  | 	vle8.v	v_src, (t5) | ||||||
|  |  | ||||||
|  | 	/* split 4-bit lo; 4-bit hi */ | ||||||
|  | 	vand.vi	v_src_lo, v_src, 0x0F | ||||||
|  | 	vsrl.vi	v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  | 	/* load dest data */ | ||||||
|  |         add t5, x_dest1, x_pos | ||||||
|  | 	vle8.v	v_dest1, (t5) | ||||||
|  |         add t5, x_dest2, x_pos | ||||||
|  | 	vle8.v	v_dest2, (t5) | ||||||
|  |         add t5, x_dest3, x_pos | ||||||
|  | 	vle8.v	v_dest3, (t5) | ||||||
|  |         add t5, x_dest4, x_pos | ||||||
|  | 	vle8.v	v_dest4, (t5) | ||||||
|  |  | ||||||
|  | 	/* dest1 */ | ||||||
|  | 	/* table indexing, ie. gf(2^8) multiplication */ | ||||||
|  | 	vrgather.vv	v_tmp_lo, v_gft1_lo, v_src_lo | ||||||
|  | 	vrgather.vv	v_tmp_hi, v_gft1_hi, v_src_hi | ||||||
|  | 	/* exclusive or, ie. gf(2^8) add */ | ||||||
|  | 	vxor.vv	v_dest1, v_tmp_lo, v_dest1 | ||||||
|  | 	vxor.vv	v_dest1, v_tmp_hi, v_dest1 | ||||||
|  |  | ||||||
|  | 	/* dest2 */ | ||||||
|  | 	vrgather.vv	v_tmp_lo, v_gft2_lo, v_src_lo | ||||||
|  | 	vrgather.vv	v_tmp_hi, v_gft2_hi, v_src_hi | ||||||
|  | 	vxor.vv	v_dest2, v_tmp_lo, v_dest2 | ||||||
|  | 	vxor.vv	v_dest2, v_tmp_hi, v_dest2 | ||||||
|  |  | ||||||
|  | 	/* dest3 */ | ||||||
|  | 	vrgather.vv	v_tmp_lo, v_gft3_lo, v_src_lo | ||||||
|  | 	vrgather.vv	v_tmp_hi, v_gft3_hi, v_src_hi | ||||||
|  | 	vxor.vv	v_dest3, v_tmp_lo, v_dest3 | ||||||
|  | 	vxor.vv	v_dest3, v_tmp_hi, v_dest3 | ||||||
|  |  | ||||||
|  | 	/* dest4 */ | ||||||
|  | 	vrgather.vv	v_tmp_lo, v_gft4_lo, v_src_lo | ||||||
|  | 	vrgather.vv	v_tmp_hi, v_gft4_hi, v_src_hi | ||||||
|  | 	vxor.vv	v_dest4, v_tmp_lo, v_dest4 | ||||||
|  | 	vxor.vv	v_dest4, v_tmp_hi, v_dest4 | ||||||
|  |  | ||||||
|  | 	/* store dest data */ | ||||||
|  | 	add t5, x_dest1, x_pos | ||||||
|  | 	vse8.v	v_dest1, (t5) | ||||||
|  | 	add t5, x_dest2, x_pos | ||||||
|  | 	vse8.v	v_dest2, (t5) | ||||||
|  | 	add t5, x_dest3, x_pos | ||||||
|  | 	vse8.v	v_dest3, (t5) | ||||||
|  | 	add t5, x_dest4, x_pos | ||||||
|  | 	vse8.v	v_dest4, (t5) | ||||||
|  |  | ||||||
|  | 	add x_pos, x_pos, t6 | ||||||
|  | 	j		.Llooprvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  | 	li		w_ret, 0 | ||||||
|  | 	ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  | 	li		w_ret, 1 | ||||||
|  | 	ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										242
									
								
								erasure_code/riscv64/gf_5vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										242
									
								
								erasure_code/riscv64/gf_5vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,242 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_5vect_dot_prod_rvv | ||||||
|  | .type gf_5vect_dot_prod_rvv, @function | ||||||
|  |  | ||||||
|  | /* void gf_5vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, | ||||||
|  |                               unsigned char **src, unsigned char **dest); | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0  /* vector length */ | ||||||
|  | #define x_vec a1  /* number of source vectors (ie. data blocks) */ | ||||||
|  | #define x_tbl a2 | ||||||
|  | #define x_src a3 | ||||||
|  | #define x_dest a4 | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_vec_i a7 | ||||||
|  | #define x_ptr t1 | ||||||
|  | #define x_pos t2 | ||||||
|  | #define x_tbl1 t3 | ||||||
|  | #define x_tbl2 t4 | ||||||
|  | #define x_tbl3 t5 | ||||||
|  | #define x_tbl4 t6 | ||||||
|  |  | ||||||
|  | #define x_tbl5  s0 | ||||||
|  | #define x_dest1 s1 | ||||||
|  | #define x_dest2 s2 | ||||||
|  | #define x_dest3 s3 | ||||||
|  | #define x_dest4 s4 | ||||||
|  | #define x_dest5 s5 | ||||||
|  |  | ||||||
|  | /* vectors */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v3 | ||||||
|  | #define v_dest1 v4 | ||||||
|  | #define v_gft1_lo v5 | ||||||
|  | #define v_gft1_hi v6 | ||||||
|  | #define v_gft2_lo v7 | ||||||
|  | #define v_gft2_hi v8 | ||||||
|  | #define v_gft3_lo v9 | ||||||
|  | #define v_gft3_hi v10 | ||||||
|  | #define v_gft4_lo v11 | ||||||
|  | #define v_gft4_hi v12 | ||||||
|  | #define v_gft5_lo v13 | ||||||
|  | #define v_gft5_hi v14 | ||||||
|  | #define v_dest2 v15 | ||||||
|  | #define v_dest3 v16 | ||||||
|  | #define v_dest4 v17 | ||||||
|  | #define v_dest5 v18 | ||||||
|  |  | ||||||
|  | gf_5vect_dot_prod_rvv: | ||||||
|  |     /* less than 16 bytes, return_fail */ | ||||||
|  |     li t0, 16 | ||||||
|  |     blt x_len, t0, .return_fail | ||||||
|  |  | ||||||
|  |     /* save s0-s4 */ | ||||||
|  |     addi sp, sp, -48 | ||||||
|  |     sd s0, 0(sp) | ||||||
|  |     sd s1, 8(sp) | ||||||
|  |     sd s2, 16(sp) | ||||||
|  |     sd s3, 24(sp) | ||||||
|  |     sd s4, 32(sp) | ||||||
|  |     sd s5, 40(sp) | ||||||
|  |  | ||||||
|  |     vsetvli a5, x0, e8, m1 | ||||||
|  |  | ||||||
|  |     /* Initialize position */ | ||||||
|  |     li x_pos, 0 | ||||||
|  |  | ||||||
|  |     /* Load destination pointers */ | ||||||
|  |     ld x_dest1, 0(x_dest) | ||||||
|  |     ld x_dest2, 8(x_dest) | ||||||
|  |     ld x_dest3, 16(x_dest) | ||||||
|  |     ld x_dest4, 24(x_dest) | ||||||
|  |     ld x_dest5, 32(x_dest) | ||||||
|  |  | ||||||
|  | /* Loop 1: x_len, vector length */ | ||||||
|  | .Llooprvv_vl: | ||||||
|  |     bge x_pos, x_len, .return_pass | ||||||
|  |  | ||||||
|  |     /* Clear destination vectors */ | ||||||
|  |     vmv.v.i v_dest1, 0 | ||||||
|  |     vmv.v.i v_dest2, 0 | ||||||
|  |     vmv.v.i v_dest3, 0 | ||||||
|  |     vmv.v.i v_dest4, 0 | ||||||
|  |     vmv.v.i v_dest5, 0 | ||||||
|  |  | ||||||
|  |     /* Reset table pointers */ | ||||||
|  |     mv x_tbl1, x_tbl | ||||||
|  |     slli t0, x_vec, 5 | ||||||
|  |     add x_tbl2, x_tbl1, t0 | ||||||
|  |     add x_tbl3, x_tbl2, t0 | ||||||
|  |     add x_tbl4, x_tbl3, t0 | ||||||
|  |     add x_tbl5, x_tbl4, t0 | ||||||
|  |  | ||||||
|  |     /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ | ||||||
|  |     li x_vec_i, 0 | ||||||
|  | .Llooprvv_vl_vects: | ||||||
|  |     /* Load source data */ | ||||||
|  |     slli a6, x_vec_i, 3 | ||||||
|  |     add a6,x_src,a6 | ||||||
|  |     ld x_ptr, 0(a6) | ||||||
|  |     add x_ptr, x_ptr, x_pos | ||||||
|  |     vle8.v v_src, (x_ptr) | ||||||
|  |  | ||||||
|  |     /* Split 4-bit lo; 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* Load gf_table's */ | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |     vle8.v v_gft1_hi, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft2_lo, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |     vle8.v v_gft2_hi, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |  | ||||||
|  |     /* Move to next source vector */ | ||||||
|  |     addi x_vec_i, x_vec_i, 1 | ||||||
|  |  | ||||||
|  |     /* dest 1 */ | ||||||
|  |     vrgather.vv v26, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft1_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest1, v_dest1, v26 | ||||||
|  |     vxor.vv v_dest1, v_dest1, v27 | ||||||
|  |  | ||||||
|  |     /* Load more gf_table's */ | ||||||
|  |     vle8.v v_gft3_lo, (x_tbl3) | ||||||
|  |     addi x_tbl3, x_tbl3, 16 | ||||||
|  |     vle8.v v_gft3_hi, (x_tbl3) | ||||||
|  |     addi x_tbl3, x_tbl3, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft4_lo, (x_tbl4) | ||||||
|  |     addi x_tbl4, x_tbl4, 16 | ||||||
|  |     vle8.v v_gft4_hi, (x_tbl4) | ||||||
|  |     addi x_tbl4, x_tbl4, 16 | ||||||
|  |  | ||||||
|  |     /* dest 2 */ | ||||||
|  |     vrgather.vv v26, v_gft2_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft2_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest2, v_dest2, v26 | ||||||
|  |     vxor.vv v_dest2, v_dest2, v27 | ||||||
|  |  | ||||||
|  |     /* dest 3 */ | ||||||
|  |     vrgather.vv v26, v_gft3_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft3_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest3, v_dest3, v26 | ||||||
|  |     vxor.vv v_dest3, v_dest3, v27 | ||||||
|  |  | ||||||
|  |     /* Load more gf_table's */ | ||||||
|  |     vle8.v v_gft5_lo, (x_tbl5) | ||||||
|  |     addi x_tbl5, x_tbl5, 16 | ||||||
|  |     vle8.v v_gft5_hi, (x_tbl5) | ||||||
|  |     addi x_tbl5, x_tbl5, 16 | ||||||
|  |  | ||||||
|  |     /* dest 4 */ | ||||||
|  |     vrgather.vv v26, v_gft4_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft4_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest4, v_dest4, v26 | ||||||
|  |     vxor.vv v_dest4, v_dest4, v27 | ||||||
|  |  | ||||||
|  |     /* dest 5 */ | ||||||
|  |     vrgather.vv v26, v_gft5_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft5_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest5, v_dest5, v26 | ||||||
|  |     vxor.vv v_dest5, v_dest5, v27 | ||||||
|  |  | ||||||
|  |     /* Check if we have processed all vectors */ | ||||||
|  |     blt x_vec_i, x_vec, .Llooprvv_vl_vects | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     vse8.v v_dest1, (x_dest1) | ||||||
|  |     vse8.v v_dest2, (x_dest2) | ||||||
|  |     vse8.v v_dest3, (x_dest3) | ||||||
|  |     vse8.v v_dest4, (x_dest4) | ||||||
|  |     vse8.v v_dest5, (x_dest5) | ||||||
|  |  | ||||||
|  |     /* Store destination data */ | ||||||
|  |     add x_dest1,x_dest1,a5 | ||||||
|  |     add x_dest2,x_dest2,a5 | ||||||
|  |     add x_dest3,x_dest3,a5 | ||||||
|  |     add x_dest4,x_dest4,a5 | ||||||
|  |     add x_dest5,x_dest5,a5 | ||||||
|  |  | ||||||
|  |     /* Increment position */ | ||||||
|  |     add x_pos, x_pos, a5 | ||||||
|  |     j .Llooprvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     /* Restore callee-saved registers */ | ||||||
|  |     ld s0, 0(sp) | ||||||
|  |     ld s1, 8(sp) | ||||||
|  |     ld s2, 16(sp) | ||||||
|  |     ld s3, 24(sp) | ||||||
|  |     ld s4, 32(sp) | ||||||
|  |     ld s5, 40(sp) | ||||||
|  |     addi sp, sp, 48 | ||||||
|  |  | ||||||
|  |     li a0, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li a0, 1 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										214
									
								
								erasure_code/riscv64/gf_5vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										214
									
								
								erasure_code/riscv64/gf_5vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,214 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_5vect_mad_rvv | ||||||
|  | .type gf_5vect_mad_rvv, @function | ||||||
|  |  | ||||||
|  | /* gf_5vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, | ||||||
|  |                    unsigned char *src, unsigned char **dest); | ||||||
|  |  */ | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0 | ||||||
|  | #define x_vec a1 | ||||||
|  | #define x_vec_i a2 | ||||||
|  | #define x_tbl a3 | ||||||
|  | #define x_src a4 | ||||||
|  | #define x_dest a5 | ||||||
|  |  | ||||||
|  | /* returns */ | ||||||
|  | #define w_ret a0 | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_pos t0 | ||||||
|  | #define x_dest1 t1 | ||||||
|  | #define x_dest2 t2 | ||||||
|  | #define x_dest3 t3 | ||||||
|  | #define x_dest4 t4 | ||||||
|  | #define x_dest5 t5 | ||||||
|  |  | ||||||
|  | /* vectors */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v3 | ||||||
|  | #define v_dest1 v4 | ||||||
|  | #define v_tmp_lo v5 | ||||||
|  | #define v_tmp_hi v6 | ||||||
|  | #define v_gft1_lo v7 | ||||||
|  | #define v_gft1_hi v8 | ||||||
|  | #define v_gft2_lo v9 | ||||||
|  | #define v_gft2_hi v10 | ||||||
|  | #define v_gft3_lo v11 | ||||||
|  | #define v_gft3_hi v12 | ||||||
|  | #define v_gft4_lo v13 | ||||||
|  | #define v_gft4_hi v14 | ||||||
|  | #define v_gft5_lo v15 | ||||||
|  | #define v_gft5_hi v16 | ||||||
|  | #define v_dest2 v19 | ||||||
|  | #define v_dest3 v20 | ||||||
|  | #define v_dest4 v21 | ||||||
|  | #define v_dest5 v22 | ||||||
|  |  | ||||||
|  | gf_5vect_mad_rvv: | ||||||
|  |     /* less than 16 bytes, return_fail */ | ||||||
|  |     li t6, 16 | ||||||
|  |     blt x_len, t6, .return_fail | ||||||
|  |  | ||||||
|  |     vsetvli a7, x0, e8, m1 | ||||||
|  |  | ||||||
|  |     /* Load table 1 */ | ||||||
|  |     slli a6, x_vec_i, 5 | ||||||
|  |     add x_tbl, x_tbl, a6 | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl) | ||||||
|  |     addi a6, x_tbl, 16 | ||||||
|  |     vle8.v v_gft1_hi, (a6) | ||||||
|  |  | ||||||
|  |     /* Load table 2 */ | ||||||
|  |     slli a6, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, a6 | ||||||
|  |     vle8.v v_gft2_lo, (x_tbl) | ||||||
|  |     addi a6, x_tbl, 16 | ||||||
|  |     vle8.v v_gft2_hi, (a6) | ||||||
|  |  | ||||||
|  |     /* Load table 3 */ | ||||||
|  |     slli a6, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, a6 | ||||||
|  |     vle8.v v_gft3_lo, (x_tbl) | ||||||
|  |     addi a6, x_tbl, 16 | ||||||
|  |     vle8.v v_gft3_hi, (a6) | ||||||
|  |  | ||||||
|  |     /* Load table 4 */ | ||||||
|  |     slli a6, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, a6 | ||||||
|  |     vle8.v v_gft4_lo, (x_tbl) | ||||||
|  |     addi a6, x_tbl, 16 | ||||||
|  |     vle8.v v_gft4_hi, (a6) | ||||||
|  |  | ||||||
|  |     /* Load table 5 */ | ||||||
|  |     slli a6, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, a6 | ||||||
|  |     vle8.v v_gft5_lo, (x_tbl) | ||||||
|  |     addi a6, x_tbl, 16 | ||||||
|  |     vle8.v v_gft5_hi, (a6) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /* Load destination pointers */ | ||||||
|  |     ld x_dest1, 0(x_dest) | ||||||
|  |     ld x_dest2, 8(x_dest) | ||||||
|  |     ld x_dest3, 16(x_dest) | ||||||
|  |     ld x_dest4, 24(x_dest) | ||||||
|  |     ld x_dest5, 32(x_dest) | ||||||
|  |  | ||||||
|  |     li x_pos, 0 | ||||||
|  |  | ||||||
|  | .Llooprvv_vl: | ||||||
|  |     blt x_pos, x_len, .Lloop_body | ||||||
|  |     j .return_pass | ||||||
|  |  | ||||||
|  | .Lloop_body: | ||||||
|  |     /* Load source data */ | ||||||
|  |     add t6, x_src, x_pos | ||||||
|  |     vle8.v v_src, (t6) | ||||||
|  |  | ||||||
|  |     /* Split 4-bit lo; 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* load dest data */ | ||||||
|  |     add t6, x_dest1, x_pos | ||||||
|  |     vle8.v v_dest1, (t6) | ||||||
|  |     add t6, x_dest2, x_pos | ||||||
|  |     vle8.v v_dest2, (t6) | ||||||
|  |     add t6, x_dest3, x_pos | ||||||
|  |     vle8.v v_dest3, (t6) | ||||||
|  |     add t6, x_dest4, x_pos | ||||||
|  |     vle8.v v_dest4, (t6) | ||||||
|  |     add t6, x_dest5, x_pos | ||||||
|  |     vle8.v v_dest5, (t6) | ||||||
|  |  | ||||||
|  |     /* dest1 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest1, v_tmp_lo, v_dest1 | ||||||
|  |     vxor.vv v_dest1, v_tmp_hi, v_dest1 | ||||||
|  |  | ||||||
|  |     /* dest2 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest2, v_tmp_lo, v_dest2 | ||||||
|  |     vxor.vv v_dest2, v_tmp_hi, v_dest2 | ||||||
|  |  | ||||||
|  |     /* dest3 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest3, v_tmp_lo, v_dest3 | ||||||
|  |     vxor.vv v_dest3, v_tmp_hi, v_dest3 | ||||||
|  |  | ||||||
|  |     /* dest4 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft4_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft4_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest4, v_tmp_lo, v_dest4 | ||||||
|  |     vxor.vv v_dest4, v_tmp_hi, v_dest4 | ||||||
|  |  | ||||||
|  |     /* dest5 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft5_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft5_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest5, v_tmp_lo, v_dest5 | ||||||
|  |     vxor.vv v_dest5, v_tmp_hi, v_dest5 | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /* Store destination data */ | ||||||
|  |     add t6, x_dest1, x_pos | ||||||
|  |     vse8.v v_dest1, (t6) | ||||||
|  |     add t6, x_dest2, x_pos | ||||||
|  |     vse8.v v_dest2, (t6) | ||||||
|  |     add t6, x_dest3, x_pos | ||||||
|  |     vse8.v v_dest3, (t6) | ||||||
|  |     add t6, x_dest4, x_pos | ||||||
|  |     vse8.v v_dest4, (t6) | ||||||
|  |     add t6, x_dest5, x_pos | ||||||
|  |     vse8.v v_dest5, (t6) | ||||||
|  |  | ||||||
|  |     /* Increment position */ | ||||||
|  |     add x_pos, x_pos, a7 | ||||||
|  |  | ||||||
|  |     j .Llooprvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     li w_ret, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li w_ret, 1 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										273
									
								
								erasure_code/riscv64/gf_6vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										273
									
								
								erasure_code/riscv64/gf_6vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,273 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_6vect_dot_prod_rvv | ||||||
|  | .type gf_6vect_dot_prod_rvv, @function | ||||||
|  |  | ||||||
|  | /* void gf_6vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, | ||||||
|  |                              unsigned char **src, unsigned char **dest); | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0  /* vector length */ | ||||||
|  | #define x_vec a1  /* number of source vectors (ie. data blocks) */ | ||||||
|  | #define x_tbl a2  /* gftbls */ | ||||||
|  | #define x_src a3  /* src */ | ||||||
|  | #define x_dest a4 /* dest */ | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_vec_i a7  /* loop counter for vectors */ | ||||||
|  | #define x_ptr t1     /* pointer to current src */ | ||||||
|  | #define x_pos t2     /* position in vector */ | ||||||
|  | #define x_tbl1 t3    /* table pointer 1 */ | ||||||
|  | #define x_tbl2 t4    /* table pointer 2 */ | ||||||
|  | #define x_tbl3 t5    /* table pointer 3 */ | ||||||
|  | #define x_tbl4 t6    /* table pointer 4 */ | ||||||
|  | #define x_tbl5 s0    /* table pointer 5 */ | ||||||
|  | #define x_tbl6 s1    /* table pointer 6 */ | ||||||
|  | #define x_dest1 s2  /* dest pointer 1   */ | ||||||
|  | #define x_dest2 s3  /* dest pointer 2   */ | ||||||
|  | #define x_dest3 s4  /* dest pointer 3 */ | ||||||
|  | #define x_dest4 s5  /* dest pointer 4  t12  -- x28 */ | ||||||
|  | #define x_dest5 s6  /* dest pointer 5 */ | ||||||
|  | #define x_dest6 s7  /* dest pointer 6 */ | ||||||
|  |  | ||||||
|  | /* vector registers */ | ||||||
|  | #define v_src v1     /* source vector */ | ||||||
|  | #define v_src_lo v2  /* low 4 bits of source */ | ||||||
|  | #define v_src_hi v3  /* high 4 bits of source */ | ||||||
|  | #define v_dest1 v4   /* destination vector 1 */ | ||||||
|  | #define v_dest2 v5   /* destination vector 2 */ | ||||||
|  | #define v_dest3 v6   /* destination vector 3 */ | ||||||
|  | #define v_dest4 v7   /* destination vector 4 */ | ||||||
|  | #define v_dest5 v8   /* destination vector 5 */ | ||||||
|  | #define v_dest6 v9   /* destination vector 6 */ | ||||||
|  | #define v_gft1_lo v10 /* gf table 1 low */ | ||||||
|  | #define v_gft1_hi v11 /* gf table 1 high */ | ||||||
|  | #define v_gft2_lo v12 /* gf table 2 low */ | ||||||
|  | #define v_gft2_hi v13 /* gf table 2 high */ | ||||||
|  | #define v_gft3_lo v14 /* gf table 3 low */ | ||||||
|  | #define v_gft3_hi v15 /* gf table 3 high */ | ||||||
|  | #define v_gft4_lo v16 /* gf table 4 low */ | ||||||
|  | #define v_gft4_hi v17 /* gf table 4 high */ | ||||||
|  | #define v_gft5_lo v18 /* gf table 5 low */ | ||||||
|  | #define v_gft5_hi v19 /* gf table 5 high */ | ||||||
|  | #define v_gft6_lo v20 /* gf table 6 low */ | ||||||
|  | #define v_gft6_hi v21 /* gf table 6 high */ | ||||||
|  |  | ||||||
|  | gf_6vect_dot_prod_rvv: | ||||||
|  |     /* less than 16 bytes, return_fail */ | ||||||
|  |     li t0, 16 | ||||||
|  |     blt x_len, t0, .return_fail | ||||||
|  |  | ||||||
|  |     /* save callee-saved registers */ | ||||||
|  |     addi sp, sp, -64 | ||||||
|  |     sd s0, 0(sp) | ||||||
|  |     sd s1, 8(sp) | ||||||
|  |     sd s2, 16(sp) | ||||||
|  |     sd s3, 24(sp) | ||||||
|  |     sd s4, 32(sp) | ||||||
|  |     sd s5, 40(sp) | ||||||
|  |     sd s6, 48(sp) | ||||||
|  |     sd s7, 56(sp) | ||||||
|  |  | ||||||
|  |     li t0, 0x0F | ||||||
|  |     vsetvli a5, x0, e8, m1 | ||||||
|  |  | ||||||
|  |     /* initialize position */ | ||||||
|  |     li x_pos, 0 | ||||||
|  |  | ||||||
|  |     /* load destination pointers */ | ||||||
|  |     ld x_dest1, 0(x14)  #  a4 is also x14 | ||||||
|  |     ld x_dest2, 8(x_dest) | ||||||
|  |     ld x_dest3, 16(x_dest) | ||||||
|  |     ld x_dest4, 24(x_dest) | ||||||
|  |     ld x_dest5, 32(x_dest) | ||||||
|  |     ld x_dest6, 40(x_dest) | ||||||
|  |  | ||||||
|  | .Llooprvv_vl: | ||||||
|  |     /* check if we have processed all elements */ | ||||||
|  |     bge x_pos, x_len, .return_pass | ||||||
|  |  | ||||||
|  |     /* initialize vector loop counter */ | ||||||
|  |     li x_vec_i, 0 | ||||||
|  |  | ||||||
|  |     /* load source pointer */ | ||||||
|  |     ld x_ptr, 0(x_src) | ||||||
|  |  | ||||||
|  |     /* clear destination vectors */ | ||||||
|  |     vmv.v.i v_dest1, 0 | ||||||
|  |     vmv.v.i v_dest2, 0 | ||||||
|  |     vmv.v.i v_dest3, 0 | ||||||
|  |     vmv.v.i v_dest4, 0 | ||||||
|  |     vmv.v.i v_dest5, 0 | ||||||
|  |     vmv.v.i v_dest6, 0 | ||||||
|  |  | ||||||
|  |     /* initialize table pointers */ | ||||||
|  |     /* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */ | ||||||
|  |     mv x_tbl1, x_tbl | ||||||
|  |     slli t0, x_vec, 5 | ||||||
|  |     add x_tbl2, x_tbl1, t0 | ||||||
|  |     add x_tbl3, x_tbl2, t0 | ||||||
|  |     add x_tbl4, x_tbl3, t0 | ||||||
|  |     add x_tbl5, x_tbl4, t0 | ||||||
|  |     add x_tbl6, x_tbl5, t0 | ||||||
|  |  | ||||||
|  | .Llooprvv_vl_vects: | ||||||
|  |     /* load source data */ | ||||||
|  |     slli a6, x_vec_i, 3 | ||||||
|  |     add a6,x_src,a6 | ||||||
|  |     ld x_ptr, 0(a6) | ||||||
|  |     add x_ptr,x_ptr,x_pos | ||||||
|  |  | ||||||
|  |     vle8.v v_src, (x_ptr) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /* split 4-bit lo; 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* load gf_table's */ | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |     vle8.v v_gft1_hi, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft2_lo, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |     vle8.v v_gft2_hi, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft3_lo, (x_tbl3) | ||||||
|  |     addi x_tbl3, x_tbl3, 16 | ||||||
|  |     vle8.v v_gft3_hi, (x_tbl3) | ||||||
|  |     addi x_tbl3, x_tbl3, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft4_lo, (x_tbl4) | ||||||
|  |     addi x_tbl4, x_tbl4, 16 | ||||||
|  |     vle8.v v_gft4_hi, (x_tbl4) | ||||||
|  |     addi x_tbl4, x_tbl4, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft5_lo, (x_tbl5) | ||||||
|  |     addi x_tbl5, x_tbl5, 16 | ||||||
|  |     vle8.v v_gft5_hi, (x_tbl5) | ||||||
|  |     addi x_tbl5, x_tbl5, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft6_lo, (x_tbl6) | ||||||
|  |     addi x_tbl6, x_tbl6, 16 | ||||||
|  |     vle8.v v_gft6_hi, (x_tbl6) | ||||||
|  |     addi x_tbl6, x_tbl6, 16 | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /* dest 1 */ | ||||||
|  |     vrgather.vv v26, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft1_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest1, v_dest1, v26 | ||||||
|  |     vxor.vv v_dest1, v_dest1, v27 | ||||||
|  |  | ||||||
|  |     /* dest 2 */ | ||||||
|  |     vrgather.vv v26, v_gft2_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft2_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest2, v_dest2, v26 | ||||||
|  |     vxor.vv v_dest2, v_dest2, v27 | ||||||
|  |  | ||||||
|  |     /* GF multiplication and accumulation for dest3 */ | ||||||
|  |     vrgather.vv v26, v_gft3_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft3_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest3, v_dest3, v26 | ||||||
|  |     vxor.vv v_dest3, v_dest3, v27 | ||||||
|  |  | ||||||
|  |     /* GF multiplication and accumulation for dest4 */ | ||||||
|  |     vrgather.vv v26, v_gft4_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft4_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest4, v_dest4, v26 | ||||||
|  |     vxor.vv v_dest4, v_dest4, v27 | ||||||
|  |  | ||||||
|  |     /* GF multiplication and accumulation for dest5 */ | ||||||
|  |     vrgather.vv v26, v_gft5_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft5_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest5, v_dest5, v26 | ||||||
|  |     vxor.vv v_dest5, v_dest5, v27 | ||||||
|  |  | ||||||
|  |     /* GF multiplication and accumulation for dest6 */ | ||||||
|  |     vrgather.vv v26, v_gft6_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft6_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest6, v_dest6, v26 | ||||||
|  |     vxor.vv v_dest6, v_dest6, v27 | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /* load next source pointer */ | ||||||
|  |     addi x_vec_i, x_vec_i,1 | ||||||
|  |  | ||||||
|  |     /* check if we have processed all vectors */ | ||||||
|  |     blt x_vec_i, x_vec, .Llooprvv_vl_vects | ||||||
|  |  | ||||||
|  |     /* store destination data */ | ||||||
|  |     vse8.v v_dest1, (x_dest1)  # x_dest1   v_dest1==v4 | ||||||
|  |     vse8.v v_dest2, (x_dest2)  #x_dest2 | ||||||
|  |     vse8.v v_dest3, (x_dest3)  #x_dest3 | ||||||
|  |     vse8.v v_dest4, (x_dest4)  # x_dest4 | ||||||
|  |     vse8.v v_dest5, (x_dest5)  # x_dest5 | ||||||
|  |     vse8.v v_dest6, (x_dest6)  # x_dest6 | ||||||
|  |  | ||||||
|  |     add x_dest1,x_dest1, a5 | ||||||
|  |     add x_dest2,x_dest2, a5 | ||||||
|  |     add x_dest3,x_dest3, a5 | ||||||
|  |     add x_dest4,x_dest4, a5 | ||||||
|  |     add x_dest5,x_dest5, a5 | ||||||
|  |     add x_dest6,x_dest6, a5 | ||||||
|  |  | ||||||
|  |     /* increment position */ | ||||||
|  |     add x_pos, x_pos, a5 | ||||||
|  |     j .Llooprvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     /* restore callee-saved registers */ | ||||||
|  |     ld s0, 0(sp) | ||||||
|  |     ld s1, 8(sp) | ||||||
|  |     ld s2, 16(sp) | ||||||
|  |     ld s3, 24(sp) | ||||||
|  |     ld s4, 32(sp) | ||||||
|  |     ld s5, 40(sp) | ||||||
|  |     ld s6, 48(sp) | ||||||
|  |     ld s7, 56(sp) | ||||||
|  |     addi sp, sp, 64 | ||||||
|  |  | ||||||
|  |     li a0, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li a0, 1 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										241
									
								
								erasure_code/riscv64/gf_6vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										241
									
								
								erasure_code/riscv64/gf_6vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,241 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_6vect_mad_rvv | ||||||
|  | .type gf_6vect_mad_rvv, @function | ||||||
|  |  | ||||||
|  | /* gf_6vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, | ||||||
|  |                    unsigned char *src, unsigned char **dest); | ||||||
|  |  */ | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0 | ||||||
|  | #define x_vec a1 | ||||||
|  | #define x_vec_i a2 | ||||||
|  | #define x_tbl a3 | ||||||
|  | #define x_src a4 | ||||||
|  | #define x_dest a5 | ||||||
|  |  | ||||||
|  | /* returns */ | ||||||
|  | #define w_ret a0 | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_pos t0 | ||||||
|  | #define x_dest1 t1 | ||||||
|  | #define x_dest2 t2 | ||||||
|  | #define x_dest3 t3 | ||||||
|  | #define x_dest4 t4 | ||||||
|  | #define x_dest5 t5 | ||||||
|  | #define x_dest6 t6 | ||||||
|  |  | ||||||
|  | /* vectors */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v3 | ||||||
|  | #define v_dest1 v4 | ||||||
|  | #define v_tmp_lo v5 | ||||||
|  | #define v_tmp_hi v6 | ||||||
|  | #define v_gft1_lo v7 | ||||||
|  | #define v_gft1_hi v8 | ||||||
|  | #define v_gft2_lo v9 | ||||||
|  | #define v_gft2_hi v10 | ||||||
|  | #define v_gft3_lo v11 | ||||||
|  | #define v_gft3_hi v12 | ||||||
|  | #define v_gft4_lo v13 | ||||||
|  | #define v_gft4_hi v14 | ||||||
|  | #define v_gft5_lo v15 | ||||||
|  | #define v_gft5_hi v16 | ||||||
|  | #define v_gft6_lo v17 | ||||||
|  | #define v_gft6_hi v18 | ||||||
|  | #define v_dest2 v19 | ||||||
|  | #define v_dest3 v20 | ||||||
|  | #define v_dest4 v21 | ||||||
|  | #define v_dest5 v22 | ||||||
|  | #define v_dest6 v23 | ||||||
|  |  | ||||||
|  | gf_6vect_mad_rvv: | ||||||
|  |     /* less than 16 bytes, return_fail */ | ||||||
|  |     li t6, 16 | ||||||
|  |     blt x_len, t6, .return_fail | ||||||
|  |  | ||||||
|  |    /* save callee-saved registers */ | ||||||
|  |     addi sp, sp, -16 | ||||||
|  |     sd s8, 0(sp) | ||||||
|  |  | ||||||
|  |     vsetvli a6, x0, e8, m1 | ||||||
|  |  | ||||||
|  |     /* Load table 1 */ | ||||||
|  |     slli s8, x_vec_i, 5 | ||||||
|  |     add x_tbl, x_tbl, s8 | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl) | ||||||
|  |     addi s8, x_tbl, 16 | ||||||
|  |     vle8.v v_gft1_hi, (s8) | ||||||
|  |  | ||||||
|  |     /* Load table 2 */ | ||||||
|  |     slli s8, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, s8 | ||||||
|  |     vle8.v v_gft2_lo, (x_tbl) | ||||||
|  |     addi s8, x_tbl, 16 | ||||||
|  |     vle8.v v_gft2_hi, (s8) | ||||||
|  |  | ||||||
|  |     /* Load table 3 */ | ||||||
|  |     slli s8, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, s8 | ||||||
|  |     vle8.v v_gft3_lo, (x_tbl) | ||||||
|  |     addi s8, x_tbl, 16 | ||||||
|  |     vle8.v v_gft3_hi, (s8) | ||||||
|  |  | ||||||
|  |     /* Load table 4 */ | ||||||
|  |     slli s8, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, s8 | ||||||
|  |     vle8.v v_gft4_lo, (x_tbl) | ||||||
|  |     addi s8, x_tbl, 16 | ||||||
|  |     vle8.v v_gft4_hi, (s8) | ||||||
|  |  | ||||||
|  |     /* Load table 5 */ | ||||||
|  |     slli s8, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, s8 | ||||||
|  |     vle8.v v_gft5_lo, (x_tbl) | ||||||
|  |     addi s8, x_tbl, 16 | ||||||
|  |     vle8.v v_gft5_hi, (s8) | ||||||
|  |  | ||||||
|  |     /* Load table 6 */ | ||||||
|  |     slli s8, x_vec, 5 | ||||||
|  |     add x_tbl, x_tbl, s8 | ||||||
|  |     vle8.v v_gft6_lo, (x_tbl) | ||||||
|  |     addi s8, x_tbl, 16 | ||||||
|  |     vle8.v v_gft6_hi, (s8) | ||||||
|  |  | ||||||
|  |     /* Load destination pointers */ | ||||||
|  |     ld x_dest1, 0(x_dest) | ||||||
|  |     ld x_dest2, 8(x_dest) | ||||||
|  |     ld x_dest3, 16(x_dest) | ||||||
|  |     ld x_dest4, 24(x_dest) | ||||||
|  |     ld x_dest5, 32(x_dest) | ||||||
|  |     ld x_dest6, 40(x_dest) | ||||||
|  |  | ||||||
|  |     li x_pos, 0 | ||||||
|  |  | ||||||
|  | .Llooprvv_vl: | ||||||
|  |     blt x_pos, x_len, .Lloop_body | ||||||
|  |     j .return_pass | ||||||
|  |  | ||||||
|  | .Lloop_body: | ||||||
|  |     /* Load source data */ | ||||||
|  |     add a7, x_src, x_pos | ||||||
|  |     vle8.v v_src, (a7) | ||||||
|  |  | ||||||
|  |     /* Split 4-bit lo; 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* load dest data */ | ||||||
|  |     add a7, x_dest1, x_pos | ||||||
|  |     vle8.v v_dest1, (a7) | ||||||
|  |     add a7, x_dest2, x_pos | ||||||
|  |     vle8.v v_dest2, (a7) | ||||||
|  |     add a7, x_dest3, x_pos | ||||||
|  |     vle8.v v_dest3, (a7) | ||||||
|  |     add a7, x_dest4, x_pos | ||||||
|  |     vle8.v v_dest4, (a7) | ||||||
|  |     add a7, x_dest5, x_pos | ||||||
|  |     vle8.v v_dest5, (a7) | ||||||
|  |     add a7, x_dest6, x_pos | ||||||
|  |     vle8.v v_dest6, (a7) | ||||||
|  |  | ||||||
|  |     /* dest1 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft1_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest1, v_tmp_lo, v_dest1 | ||||||
|  |     vxor.vv v_dest1, v_tmp_hi, v_dest1 | ||||||
|  |  | ||||||
|  |     /* dest2 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft2_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft2_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest2, v_tmp_lo, v_dest2 | ||||||
|  |     vxor.vv v_dest2, v_tmp_hi, v_dest2 | ||||||
|  |  | ||||||
|  |     /* dest3 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft3_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft3_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest3, v_tmp_lo, v_dest3 | ||||||
|  |     vxor.vv v_dest3, v_tmp_hi, v_dest3 | ||||||
|  |  | ||||||
|  |     /* dest4 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft4_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft4_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest4, v_tmp_lo, v_dest4 | ||||||
|  |     vxor.vv v_dest4, v_tmp_hi, v_dest4 | ||||||
|  |  | ||||||
|  |     /* dest5 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft5_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft5_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest5, v_tmp_lo, v_dest5 | ||||||
|  |     vxor.vv v_dest5, v_tmp_hi, v_dest5 | ||||||
|  |  | ||||||
|  |     /* dest6 */ | ||||||
|  |     vrgather.vv v_tmp_lo, v_gft6_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp_hi, v_gft6_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest6, v_tmp_lo, v_dest6 | ||||||
|  |     vxor.vv v_dest6, v_tmp_hi, v_dest6 | ||||||
|  |  | ||||||
|  |     /* Store destination data */ | ||||||
|  |     add a7, x_dest1, x_pos | ||||||
|  |     vse8.v v_dest1, (a7) | ||||||
|  |     add a7, x_dest2, x_pos | ||||||
|  |     vse8.v v_dest2, (a7) | ||||||
|  |     add a7, x_dest3, x_pos | ||||||
|  |     vse8.v v_dest3, (a7) | ||||||
|  |     add a7, x_dest4, x_pos | ||||||
|  |     vse8.v v_dest4, (a7) | ||||||
|  |     add a7, x_dest5, x_pos | ||||||
|  |     vse8.v v_dest5, (a7) | ||||||
|  |     add a7, x_dest6, x_pos | ||||||
|  |     vse8.v v_dest6, (a7) | ||||||
|  |  | ||||||
|  |     /* Increment position */ | ||||||
|  |     add x_pos, x_pos, a6 | ||||||
|  |  | ||||||
|  |     j .Llooprvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     ld s8, 0(sp) | ||||||
|  |     addi sp, sp, 16 | ||||||
|  |  | ||||||
|  |     li w_ret, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li w_ret, 1 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										299
									
								
								erasure_code/riscv64/gf_7vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										299
									
								
								erasure_code/riscv64/gf_7vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,299 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_7vect_dot_prod_rvv | ||||||
|  | .type gf_7vect_dot_prod_rvv, @function | ||||||
|  |  | ||||||
|  | /* void gf_7vect_dot_prod_rvv(int len, int vlen, unsigned char *gftbls, | ||||||
|  |                               unsigned char **src, unsigned char **dest); | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0  /* vector length */ | ||||||
|  | #define x_vec a1  /* number of source vectors (ie. data blocks) */ | ||||||
|  | #define x_tbl a2 | ||||||
|  | #define x_src a3 | ||||||
|  | #define x_dest a4 | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_vec_i t1 | ||||||
|  | #define x_ptr t2 | ||||||
|  | #define x_pos t3 | ||||||
|  |  | ||||||
|  | #define x_tbl1 t4 | ||||||
|  | #define x_tbl2 t5 | ||||||
|  | #define x_tbl3 t6 | ||||||
|  | #define x_tbl4 s8 | ||||||
|  | #define x_tbl5 a6 | ||||||
|  |  | ||||||
|  | #define x_tbl6 a7 | ||||||
|  | #define x_tbl7 s0 | ||||||
|  | #define x_dest1 s1 | ||||||
|  | #define x_dest2 s2 | ||||||
|  | #define x_dest3 s3 | ||||||
|  | #define x_dest4 s4 | ||||||
|  | #define x_dest5 s5 | ||||||
|  | #define x_dest6 s6 | ||||||
|  | #define x_dest7 s7 | ||||||
|  |  | ||||||
|  | /* vectors */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v3 | ||||||
|  |  | ||||||
|  | #define v_dest1 v4 | ||||||
|  | #define v_dest2 v5   /* destination 2 */ | ||||||
|  | #define v_dest3 v6   /* destination 3 */ | ||||||
|  | #define v_dest4 v7   /* destination 4 */ | ||||||
|  | #define v_dest5 v8   /* destination 5 */ | ||||||
|  | #define v_dest6 v9   /* destination 6 */ | ||||||
|  | #define v_dest7 v10  /* destination 7 */ | ||||||
|  |  | ||||||
|  | #define v_gft1_lo v11 | ||||||
|  | #define v_gft1_hi v12 | ||||||
|  | #define v_gft2_lo v13 /* GF table 2 low */ | ||||||
|  | #define v_gft2_hi v14 /* GF table 2 high */ | ||||||
|  | #define v_gft3_lo v15 /* GF table 3 low */ | ||||||
|  | #define v_gft3_hi v16 /* GF table 3 high */ | ||||||
|  | #define v_gft4_lo v17 /* GF table 4 low */ | ||||||
|  | #define v_gft4_hi v18 /* GF table 4 high */ | ||||||
|  | #define v_gft5_lo v19 /* GF table 5 low */ | ||||||
|  | #define v_gft5_hi v20 /* GF table 5 high */ | ||||||
|  | #define v_gft6_lo v21 /* GF table 6 low */ | ||||||
|  | #define v_gft6_hi v22 /* GF table 6 high */ | ||||||
|  | #define v_gft7_lo v23 | ||||||
|  | #define v_gft7_hi v24 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | gf_7vect_dot_prod_rvv: | ||||||
|  |     /* less than 16 bytes, return_fail */ | ||||||
|  |     li t0, 16 | ||||||
|  |     blt x_len, t0, .return_fail | ||||||
|  |  | ||||||
|  |     /* save callee-saved registers */ | ||||||
|  |     addi sp, sp, -80 | ||||||
|  |     sd s0, 0(sp) | ||||||
|  |     sd s1, 8(sp) | ||||||
|  |     sd s2, 16(sp) | ||||||
|  |     sd s3, 24(sp) | ||||||
|  |     sd s4, 32(sp) | ||||||
|  |     sd s5, 40(sp) | ||||||
|  |     sd s6, 48(sp) | ||||||
|  |     sd s7, 56(sp) | ||||||
|  |     sd s8, 64(sp) | ||||||
|  |  | ||||||
|  |     vsetvli t0, x0, e8, m1 | ||||||
|  |  | ||||||
|  |     /* initialize position */ | ||||||
|  |     li x_pos, 0 | ||||||
|  |  | ||||||
|  |     /* load destination pointers */ | ||||||
|  |     ld x_dest1, 0(x_dest) | ||||||
|  |     ld x_dest2, 8(x_dest) | ||||||
|  |     ld x_dest3, 16(x_dest) | ||||||
|  |     ld x_dest4, 24(x_dest) | ||||||
|  |     ld x_dest5, 32(x_dest) | ||||||
|  |     ld x_dest6, 40(x_dest) | ||||||
|  |     ld x_dest7, 48(x_dest) | ||||||
|  |  | ||||||
|  | /* Loop 1: x_len, vector length */ | ||||||
|  | .Llooprvv_vl: | ||||||
|  |     /* check if we have processed all elements */ | ||||||
|  |     bge x_pos, x_len, .return_pass | ||||||
|  |  | ||||||
|  |     /* initialize vector loop counter */ | ||||||
|  |     li x_vec_i, 0 | ||||||
|  |  | ||||||
|  |     /* load source pointer */ | ||||||
|  |     ld x_ptr, 0(x_src) | ||||||
|  |  | ||||||
|  |     /* clear destination vectors */ | ||||||
|  |     vmv.v.i v_dest1, 0 | ||||||
|  |     vmv.v.i v_dest2, 0 | ||||||
|  |     vmv.v.i v_dest3, 0 | ||||||
|  |     vmv.v.i v_dest4, 0 | ||||||
|  |     vmv.v.i v_dest5, 0 | ||||||
|  |     vmv.v.i v_dest6, 0 | ||||||
|  |     vmv.v.i v_dest7, 0 | ||||||
|  |  | ||||||
|  |     /* reset table pointers */ | ||||||
|  |     mv x_tbl1, x_tbl | ||||||
|  |     mv x_tbl1, x_tbl | ||||||
|  |     slli a5, x_vec, 5 | ||||||
|  |     add x_tbl2, x_tbl1, a5 | ||||||
|  |     add x_tbl3, x_tbl2, a5 | ||||||
|  |     add x_tbl4, x_tbl3, a5 | ||||||
|  |     add x_tbl5, x_tbl4, a5 | ||||||
|  |     add x_tbl6, x_tbl5, a5 | ||||||
|  |     add x_tbl7, x_tbl6, a5 | ||||||
|  |  | ||||||
|  | .Llooprvv_vl_vects: | ||||||
|  |     /* load source data */ | ||||||
|  |     slli a5, x_vec_i, 3 | ||||||
|  |     add a5,x_src,a5 | ||||||
|  |     ld x_ptr, 0(a5) | ||||||
|  |     add x_ptr,x_ptr,x_pos | ||||||
|  |  | ||||||
|  |     vle8.v v_src, (x_ptr) | ||||||
|  |  | ||||||
|  |     /* split 4-bit lo; 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* load gf_table's */ | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |     vle8.v v_gft1_hi, (x_tbl1) | ||||||
|  |     addi x_tbl1, x_tbl1, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft2_lo, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |     vle8.v v_gft2_hi, (x_tbl2) | ||||||
|  |     addi x_tbl2, x_tbl2, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft3_lo, (x_tbl3) | ||||||
|  |     addi x_tbl3, x_tbl3, 16 | ||||||
|  |     vle8.v v_gft3_hi, (x_tbl3) | ||||||
|  |     addi x_tbl3, x_tbl3, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft4_lo, (x_tbl4) | ||||||
|  |     addi x_tbl4, x_tbl4, 16 | ||||||
|  |     vle8.v v_gft4_hi, (x_tbl4) | ||||||
|  |     addi x_tbl4, x_tbl4, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft5_lo, (x_tbl5) | ||||||
|  |     addi x_tbl5, x_tbl5, 16 | ||||||
|  |     vle8.v v_gft5_hi, (x_tbl5) | ||||||
|  |     addi x_tbl5, x_tbl5, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft6_lo, (x_tbl6) | ||||||
|  |     addi x_tbl6, x_tbl6, 16 | ||||||
|  |     vle8.v v_gft6_hi, (x_tbl6) | ||||||
|  |     addi x_tbl6, x_tbl6, 16 | ||||||
|  |  | ||||||
|  |     vle8.v v_gft7_lo, (x_tbl7) | ||||||
|  |     addi x_tbl7, x_tbl7, 16 | ||||||
|  |     vle8.v v_gft7_hi, (x_tbl7) | ||||||
|  |     addi x_tbl7, x_tbl7, 16 | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /* dest 1 */ | ||||||
|  |     vrgather.vv v26, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft1_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest1, v_dest1, v26 | ||||||
|  |     vxor.vv v_dest1, v_dest1, v27 | ||||||
|  |  | ||||||
|  |     /* dest 2 */ | ||||||
|  |     vrgather.vv v26, v_gft2_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft2_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest2, v_dest2, v26 | ||||||
|  |     vxor.vv v_dest2, v_dest2, v27 | ||||||
|  |  | ||||||
|  |     /* GF multiplication and accumulation for dest3 */ | ||||||
|  |     vrgather.vv v26, v_gft3_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft3_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest3, v_dest3, v26 | ||||||
|  |     vxor.vv v_dest3, v_dest3, v27 | ||||||
|  |  | ||||||
|  |     /* GF multiplication and accumulation for dest4 */ | ||||||
|  |     vrgather.vv v26, v_gft4_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft4_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest4, v_dest4, v26 | ||||||
|  |     vxor.vv v_dest4, v_dest4, v27 | ||||||
|  |  | ||||||
|  |     /* GF multiplication and accumulation for dest5 */ | ||||||
|  |     vrgather.vv v26, v_gft5_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft5_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest5, v_dest5, v26 | ||||||
|  |     vxor.vv v_dest5, v_dest5, v27 | ||||||
|  |  | ||||||
|  |     /* GF multiplication and accumulation for dest6 */ | ||||||
|  |     vrgather.vv v26, v_gft6_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft6_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest6, v_dest6, v26 | ||||||
|  |     vxor.vv v_dest6, v_dest6, v27 | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     /* GF multiplication and accumulation for dest7 */ | ||||||
|  |     vrgather.vv v26, v_gft7_lo, v_src_lo | ||||||
|  |     vrgather.vv v27, v_gft7_hi, v_src_hi | ||||||
|  |     vxor.vv v_dest7, v_dest7, v26 | ||||||
|  |     vxor.vv v_dest7, v_dest7, v27 | ||||||
|  |  | ||||||
|  |     /* increment x_vec_i */ | ||||||
|  |     addi x_vec_i, x_vec_i, 1 | ||||||
|  |     blt x_vec_i, x_vec, .Llooprvv_vl_vects | ||||||
|  |  | ||||||
|  |     /* Store results to destination */ | ||||||
|  |     vse8.v v_dest1, (x_dest1) | ||||||
|  |     vse8.v v_dest2, (x_dest2) | ||||||
|  |     vse8.v v_dest3, (x_dest3) | ||||||
|  |     vse8.v v_dest4, (x_dest4) | ||||||
|  |     vse8.v v_dest5, (x_dest5) | ||||||
|  |     vse8.v v_dest6, (x_dest6) | ||||||
|  |     vse8.v v_dest7, (x_dest7) | ||||||
|  |  | ||||||
|  |     add x_dest1,x_dest1, t0 | ||||||
|  |     add x_dest2,x_dest2, t0 | ||||||
|  |     add x_dest3,x_dest3, t0 | ||||||
|  |     add x_dest4,x_dest4, t0 | ||||||
|  |     add x_dest5,x_dest5, t0 | ||||||
|  |     add x_dest6,x_dest6, t0 | ||||||
|  |     add x_dest7,x_dest7, t0 | ||||||
|  |  | ||||||
|  |     /* increment one vector length */ | ||||||
|  |     add x_pos, x_pos, t0 | ||||||
|  |     j .Llooprvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     /* Restore callee-saved registers */ | ||||||
|  |     ld s0, 0(sp) | ||||||
|  |     ld s1, 8(sp) | ||||||
|  |     ld s2, 16(sp) | ||||||
|  |     ld s3, 24(sp) | ||||||
|  |     ld s4, 32(sp) | ||||||
|  |     ld s5, 40(sp) | ||||||
|  |     ld s6, 48(sp) | ||||||
|  |     ld s7, 56(sp) | ||||||
|  |     ld s8, 64(sp) | ||||||
|  |     addi sp, sp, 80 | ||||||
|  |  | ||||||
|  |     /* Return success */ | ||||||
|  |     li a0, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li a0, 1  # return fail | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										136
									
								
								erasure_code/riscv64/gf_vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								erasure_code/riscv64/gf_vect_dot_prod_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,136 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # RISC-V RVV implementation of gf_vect_dot_prod_rvv | ||||||
|  |  | ||||||
|  | # Function: gf_vect_dot_prod_rvv | ||||||
|  | # Arguments: | ||||||
|  | #   a0: len (vector length) | ||||||
|  | #   a1: vlen (number of source vectors) | ||||||
|  | #   a2: gftbls (pointer to GF(2^8) multiplication tables) | ||||||
|  | #   a3: src (pointer to array of source vector pointers) | ||||||
|  | #   a4: dest (pointer to destination vector) | ||||||
|  |  | ||||||
|  | # Local variables: | ||||||
|  | #   t0: vec_i (source vector index) | ||||||
|  | #   t1: ptr (pointer to current source vector) | ||||||
|  | #   t2: pos (current position in vector) | ||||||
|  | #   t3: tbl1 (pointer to current GF table) | ||||||
|  |  | ||||||
|  | # Vector registers: | ||||||
|  | #   v0: z_mask0f (mask for low 4 bits) | ||||||
|  | #   v1: z_src (source vector data) | ||||||
|  | #   v2: z_src_lo (low 4 bits of source vector) | ||||||
|  | #   v3: z_src_hi (high 4 bits of source vector) | ||||||
|  | #   v4: z_dest (destination vector) | ||||||
|  | #   v5: z_gft1_lo (low 8 bits of GF table) | ||||||
|  | #   v6: z_gft1_hi (high 8 bits of GF table) | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .global gf_vect_dot_prod_rvv | ||||||
|  | .type gf_vect_dot_prod_rvv, @function | ||||||
|  |  | ||||||
|  | gf_vect_dot_prod_rvv: | ||||||
|  |     # Check if len < 16 | ||||||
|  |     li t4, 16 | ||||||
|  |     blt a0, t4, .return_fail | ||||||
|  |  | ||||||
|  |     vsetvli t5, zero, e8, m1  # Set vector length to maximum | ||||||
|  |  | ||||||
|  |     # Initialize pos = 0 | ||||||
|  |     li t2, 0 | ||||||
|  |  | ||||||
|  |     # Multiply vlen by 8 (each pointer is 8 bytes) | ||||||
|  |     slli a1, a1, 3 | ||||||
|  |  | ||||||
|  | .Llooprvv_vl: | ||||||
|  |     # Check if pos >= len | ||||||
|  |     bge t2, a0, .return_pass | ||||||
|  |  | ||||||
|  |     # Clear z_dest | ||||||
|  |     vmv.v.i v4, 0 | ||||||
|  |  | ||||||
|  |     # Initialize vec_i = 0 | ||||||
|  |     li t0, 0 | ||||||
|  |  | ||||||
|  |     # Reset tbl1 to gftbls | ||||||
|  |     mv t3, a2 | ||||||
|  |  | ||||||
|  | .Llooprvv_vl_vects: | ||||||
|  |     # Load src[vec_i] into ptr | ||||||
|  |     add t6, a3, t0            # src + vec_i * 8 | ||||||
|  |     ld t1, 0(t6)              # Load pointer to current source vector | ||||||
|  |  | ||||||
|  |     # Load src data into z_src | ||||||
|  |     add t1, t1, t2          # add offset | ||||||
|  |     vle8.v v1, (t1)           # Load source vector into v1 | ||||||
|  |  | ||||||
|  |     # Increment vec_i | ||||||
|  |     addi t0, t0, 8 | ||||||
|  |  | ||||||
|  |     # Load GF table (low and high) | ||||||
|  |     vle8.v v5, (t3)           # Load low 8 bits of GF table | ||||||
|  |     addi t3, t3, 16           # Move to next GF table entry | ||||||
|  |     vle8.v v6, (t3)           # Load high 8 bits of GF table | ||||||
|  |     addi t3, t3, 16           # Move to next GF table entry | ||||||
|  |  | ||||||
|  |     # Split src into low and high 4 bits | ||||||
|  |     vand.vi v2, v1, 0x0F      # z_src_lo = z_src & z_mask0f | ||||||
|  |     vsrl.vi v3, v1, 4         # z_src_hi = z_src >> 4 | ||||||
|  |  | ||||||
|  |     # GF multiplication (table lookup) | ||||||
|  |     vrgather.vv v8, v5, v2    # z_gft1_lo = GF table lookup for low 4 bits | ||||||
|  |     vrgather.vv v9, v6, v3    # z_gft1_hi = GF table lookup for high 4 bits | ||||||
|  |  | ||||||
|  |     # GF addition (XOR) | ||||||
|  |     vxor.vv v4, v4, v8        # z_dest ^= z_gft1_lo | ||||||
|  |     vxor.vv v4, v4, v9        # z_dest ^= z_gft1_hi | ||||||
|  |  | ||||||
|  |     # Check if vec_i < vlen | ||||||
|  |     blt t0, a1, .Llooprvv_vl_vects | ||||||
|  |  | ||||||
|  |     # Store z_dest to dest[pos] | ||||||
|  |     vse8.v v4, (a4)           # Store destination vector | ||||||
|  |     add a4, a4, t5           # Move dest pointer to next position | ||||||
|  |  | ||||||
|  |     # Increment pos | ||||||
|  |     add t2, t2, t5           # pos += 16 (vector length) | ||||||
|  |  | ||||||
|  |     j .Llooprvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     li a0, 0                  # Return 0 (success) | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li a0, 1                  # Return 1 (failure) | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										119
									
								
								erasure_code/riscv64/gf_vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								erasure_code/riscv64/gf_vect_mad_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,119 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_vect_mad_rvv | ||||||
|  | .type gf_vect_mad_rvv, @function | ||||||
|  |  | ||||||
|  | /* gf_vect_mad_rvv(int len, int vec, int vec_i, unsigned char *gftbls, | ||||||
|  |                    unsigned char *src, unsigned char *dest); | ||||||
|  |  */ | ||||||
|  | /* arguments */ | ||||||
|  | #define x_len a0 | ||||||
|  | #define x_vec_i a2 | ||||||
|  | #define x_tbl a3 | ||||||
|  | #define x_src a4 | ||||||
|  | #define x_dest a5 | ||||||
|  |  | ||||||
|  | /* returns */ | ||||||
|  | #define w_ret a0 | ||||||
|  |  | ||||||
|  | /* local variables */ | ||||||
|  | #define x_pos t0 | ||||||
|  |  | ||||||
|  | /* vectors */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v3 | ||||||
|  | #define v_dest v4 | ||||||
|  | #define v_tmp1_lo v5 | ||||||
|  | #define v_tmp1_hi v6 | ||||||
|  | #define v_gft1_lo v7 | ||||||
|  | #define v_gft1_hi v8 | ||||||
|  |  | ||||||
|  | gf_vect_mad_rvv: | ||||||
|  |     /* less than 16 bytes, return_fail */ | ||||||
|  |     li t1, 16 | ||||||
|  |     blt x_len, t1, .return_fail | ||||||
|  |  | ||||||
|  |     vsetvli t2, x0, e8, m1 | ||||||
|  |  | ||||||
|  |     /* x_tbl += x_vec_i * 2^5 */ | ||||||
|  |     slli t1, x_vec_i, 5 | ||||||
|  |     add x_tbl, x_tbl, t1 | ||||||
|  |  | ||||||
|  |     /* Load gft1_lo and gft1_hi */ | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl) | ||||||
|  |     addi t1, x_tbl, 16 | ||||||
|  |     vle8.v v_gft1_hi, (t1) | ||||||
|  |  | ||||||
|  |     li x_pos, 0 | ||||||
|  |  | ||||||
|  | .Lloop_rvv_vl: | ||||||
|  |     /* load src data */ | ||||||
|  |     vle8.v v_src, (x_src) | ||||||
|  |  | ||||||
|  |     /* split 4-bit lo; 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* load dest data */ | ||||||
|  |     vle8.v v_dest, (x_dest) | ||||||
|  |  | ||||||
|  |     /* table indexing, ie. gf(2^8) multiplication */ | ||||||
|  |     /* RISC-V RVV does not have tbl instruction, use vrgather.vv */ | ||||||
|  |     vrgather.vv v_tmp1_lo, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp1_hi, v_gft1_hi, v_src_hi | ||||||
|  |  | ||||||
|  |     /* exclusive or, ie. gf(2^8) add */ | ||||||
|  |     vxor.vv v_dest, v_tmp1_lo, v_dest | ||||||
|  |     vxor.vv v_dest, v_tmp1_hi, v_dest | ||||||
|  |  | ||||||
|  |     /* store dest data */ | ||||||
|  |     vse8.v v_dest, (x_dest) | ||||||
|  |  | ||||||
|  |     /* increment one vector length */ | ||||||
|  |     add x_pos, x_pos, t2 | ||||||
|  |     add x_src, x_src, t2 | ||||||
|  |     add x_dest, x_dest, t2 | ||||||
|  |  | ||||||
|  |     blt x_pos, x_len, .Lloop_rvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     li w_ret, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li w_ret, 1 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										114
									
								
								erasure_code/riscv64/gf_vect_mul_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								erasure_code/riscv64/gf_vect_mul_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | |||||||
|  | ################################################################## | ||||||
|  | #  Copyright (c) 2025 sanechips Technologies Co., Ltd. | ||||||
|  | # | ||||||
|  | #  Redistribution and use in source and binary forms, with or without | ||||||
|  | #  modification, are permitted provided that the following conditions | ||||||
|  | #  are met: | ||||||
|  | #    * Redistributions of source code must retain the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer. | ||||||
|  | #    * Redistributions in binary form must reproduce the above copyright | ||||||
|  | #      notice, this list of conditions and the following disclaimer in | ||||||
|  | #      the documentation and/or other materials provided with the | ||||||
|  | #      distribution. | ||||||
|  | #    * Neither the name of sanechips Corporation nor the names of its | ||||||
|  | #      contributors may be used to endorse or promote products derived | ||||||
|  | #      from this software without specific prior written permission. | ||||||
|  | # | ||||||
|  | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||||
|  | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||||
|  | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||||
|  | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||||
|  | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||||
|  | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||||
|  | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||||
|  | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||||
|  | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||||
|  | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  | ######################################################################## | ||||||
|  |  | ||||||
|  | #if HAVE_RVV | ||||||
|  | .text | ||||||
|  | .align 2 | ||||||
|  |  | ||||||
|  | .global gf_vect_mul_rvv | ||||||
|  | .type gf_vect_mul_rvv, @function | ||||||
|  |  | ||||||
|  | /* Function arguments: | ||||||
|  |  *   a0: len    - Length of vector in bytes. | ||||||
|  |  *   a1: gftbl  - Pointer to 32-byte array of pre-calculated constants. | ||||||
|  |  *   a2: src    - Pointer to source data array. | ||||||
|  |  *   a3: dest   - Pointer to destination data array. | ||||||
|  |  * Returns: | ||||||
|  |  *   a0: 0 for success, 1 for failure. | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /* Local variables */ | ||||||
|  | #define x_pos t0 | ||||||
|  | #define x_tmp t1 | ||||||
|  | #define x_ptr t2 | ||||||
|  | #define x_len a0 | ||||||
|  | #define x_tbl a1 | ||||||
|  | #define x_src a2 | ||||||
|  | #define x_dest a3 | ||||||
|  |  | ||||||
|  | /* Vector registers */ | ||||||
|  | #define v_src v1 | ||||||
|  | #define v_src_lo v2 | ||||||
|  | #define v_src_hi v3 | ||||||
|  | #define v_dest v4 | ||||||
|  | #define v_tmp1_lo v5 | ||||||
|  | #define v_tmp1_hi v6 | ||||||
|  | #define v_gft1_lo v7 | ||||||
|  | #define v_gft1_hi v8 | ||||||
|  |  | ||||||
|  | gf_vect_mul_rvv: | ||||||
|  |     /* Check if len is 32 bytes */ | ||||||
|  |     andi x_tmp, x_len, 0x1F | ||||||
|  |     bnez x_tmp, .return_fail | ||||||
|  |  | ||||||
|  |     vsetvli t6, x0, e8, m1 | ||||||
|  |  | ||||||
|  |     /* Load pre-calculated constants into v_gft1_lo and v_gft1_hi */ | ||||||
|  |     vle8.v v_gft1_lo, (x_tbl) | ||||||
|  |     addi t3, x_tbl, 16 | ||||||
|  |     vle8.v v_gft1_hi, (t3) | ||||||
|  |  | ||||||
|  |     /* Initialize position counter */ | ||||||
|  |     li x_pos, 0 | ||||||
|  |  | ||||||
|  | .Llooprvv_vl: | ||||||
|  |     /* Load source data into v_src */ | ||||||
|  |     add x_ptr,x_src,x_pos | ||||||
|  |     vle8.v v_src, (x_ptr) | ||||||
|  |  | ||||||
|  |     /* Split 4-bit lo and 4-bit hi */ | ||||||
|  |     vand.vi v_src_lo, v_src, 0x0F | ||||||
|  |     vsrl.vi v_src_hi, v_src, 4 | ||||||
|  |  | ||||||
|  |     /* Table lookup (GF multiplication) */ | ||||||
|  |     vrgather.vv v_tmp1_lo, v_gft1_lo, v_src_lo | ||||||
|  |     vrgather.vv v_tmp1_hi, v_gft1_hi, v_src_hi | ||||||
|  |  | ||||||
|  |     /* XOR (GF addition) */ | ||||||
|  |     vxor.vv v_dest, v_tmp1_hi, v_tmp1_lo | ||||||
|  |  | ||||||
|  |     /* Store result to destination */ | ||||||
|  |     vse8.v v_dest, (x_dest) | ||||||
|  |  | ||||||
|  |     /* Increment position counter */ | ||||||
|  |     add x_pos, x_pos, t6 | ||||||
|  |     add x_dest, x_dest, t6 | ||||||
|  |  | ||||||
|  |     /* Check if we have processed all bytes */ | ||||||
|  |     blt x_pos, x_len, .Llooprvv_vl | ||||||
|  |  | ||||||
|  | .return_pass: | ||||||
|  |     li a0, 0 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | .return_fail: | ||||||
|  |     li a0, 1 | ||||||
|  |     ret | ||||||
|  |  | ||||||
|  | #endif | ||||||
		Reference in New Issue
	
	Block a user
	 lvshuo
					lvshuo