Fix mips vpx_post_proc_down_and_across_mb_row_msa function
Added fix to handle non-multiple of 16 cols case for size 16 Change-Id: If3a6d772d112077c5e0a9be9e612e1148f04338c
This commit is contained in:
parent
b63e88e506
commit
fddf66b741
@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./macros_msa.h"
|
||||
|
||||
extern const int16_t vpx_rv[];
|
||||
@ -295,6 +296,7 @@ static void postproc_down_across_luma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
|
||||
uint8_t *p_dst_st = dst_ptr;
|
||||
uint8_t *f_orig = f;
|
||||
uint16_t col;
|
||||
uint64_t out0, out1, out2, out3;
|
||||
v16u8 above2, above1, below2, below1;
|
||||
v16u8 src, ref, ref_temp;
|
||||
v16u8 inter0, inter1, inter2, inter3, inter4, inter5, inter6;
|
||||
@ -346,6 +348,67 @@ static void postproc_down_across_luma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
|
||||
f += 16;
|
||||
}
|
||||
|
||||
if (0 != (cols / 16)) {
|
||||
ref = LD_UB(f);
|
||||
LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
|
||||
src = LD_UB(p_src);
|
||||
LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
|
||||
VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
|
||||
above2 = LD_UB(p_src + 3 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
|
||||
above1 = LD_UB(p_src + 4 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
|
||||
src = LD_UB(p_src + 5 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
|
||||
below1 = LD_UB(p_src + 6 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
|
||||
below2 = LD_UB(p_src + 7 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
|
||||
above2 = LD_UB(p_src + 8 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
|
||||
above1 = LD_UB(p_src + 9 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
|
||||
src = LD_UB(p_src + 10 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter8);
|
||||
below1 = LD_UB(p_src + 11 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter9);
|
||||
below2 = LD_UB(p_src + 12 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter10);
|
||||
above2 = LD_UB(p_src + 13 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter11);
|
||||
above1 = LD_UB(p_src + 14 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter12);
|
||||
src = LD_UB(p_src + 15 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter13);
|
||||
below1 = LD_UB(p_src + 16 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter14);
|
||||
below2 = LD_UB(p_src + 17 * src_stride);
|
||||
VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter15);
|
||||
out0 = __msa_copy_u_d((v2i64)inter0, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)inter1, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)inter2, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)inter3, 0);
|
||||
SD4(out0, out1, out2, out3, p_dst, dst_stride);
|
||||
|
||||
out0 = __msa_copy_u_d((v2i64)inter4, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)inter5, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)inter6, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)inter7, 0);
|
||||
SD4(out0, out1, out2, out3, p_dst + 4 * dst_stride, dst_stride);
|
||||
|
||||
out0 = __msa_copy_u_d((v2i64)inter8, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)inter9, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)inter10, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)inter11, 0);
|
||||
SD4(out0, out1, out2, out3, p_dst + 8 * dst_stride, dst_stride);
|
||||
|
||||
out0 = __msa_copy_u_d((v2i64)inter12, 0);
|
||||
out1 = __msa_copy_u_d((v2i64)inter13, 0);
|
||||
out2 = __msa_copy_u_d((v2i64)inter14, 0);
|
||||
out3 = __msa_copy_u_d((v2i64)inter15, 0);
|
||||
SD4(out0, out1, out2, out3, p_dst + 12 * dst_stride, dst_stride);
|
||||
}
|
||||
|
||||
f = f_orig;
|
||||
p_dst = dst_ptr - 2;
|
||||
LD_UB8(p_dst, dst_stride, inter0, inter1, inter2, inter3, inter4, inter5,
|
||||
|
Loading…
x
Reference in New Issue
Block a user