Merge "Fix mips vpx_post_proc_down_and_across_mb_row_msa function"

This commit is contained in:
James Zern 2017-02-16 23:02:10 +00:00 committed by Gerrit Code Review
commit 0f014c97e5

View File

@ -9,6 +9,7 @@
*/
#include <stdlib.h>
#include "./macros_msa.h"
extern const int16_t vpx_rv[];
@ -295,6 +296,7 @@ static void postproc_down_across_luma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
uint8_t *p_dst_st = dst_ptr;
uint8_t *f_orig = f;
uint16_t col;
uint64_t out0, out1, out2, out3;
v16u8 above2, above1, below2, below1;
v16u8 src, ref, ref_temp;
v16u8 inter0, inter1, inter2, inter3, inter4, inter5, inter6;
@ -346,6 +348,67 @@ static void postproc_down_across_luma_msa(uint8_t *src_ptr, uint8_t *dst_ptr,
f += 16;
}
if (0 != (cols / 16)) {
ref = LD_UB(f);
LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1);
src = LD_UB(p_src);
LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2);
VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0);
above2 = LD_UB(p_src + 3 * src_stride);
VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1);
above1 = LD_UB(p_src + 4 * src_stride);
VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2);
src = LD_UB(p_src + 5 * src_stride);
VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3);
below1 = LD_UB(p_src + 6 * src_stride);
VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4);
below2 = LD_UB(p_src + 7 * src_stride);
VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5);
above2 = LD_UB(p_src + 8 * src_stride);
VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6);
above1 = LD_UB(p_src + 9 * src_stride);
VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7);
src = LD_UB(p_src + 10 * src_stride);
VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter8);
below1 = LD_UB(p_src + 11 * src_stride);
VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter9);
below2 = LD_UB(p_src + 12 * src_stride);
VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter10);
above2 = LD_UB(p_src + 13 * src_stride);
VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter11);
above1 = LD_UB(p_src + 14 * src_stride);
VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter12);
src = LD_UB(p_src + 15 * src_stride);
VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter13);
below1 = LD_UB(p_src + 16 * src_stride);
VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter14);
below2 = LD_UB(p_src + 17 * src_stride);
VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter15);
out0 = __msa_copy_u_d((v2i64)inter0, 0);
out1 = __msa_copy_u_d((v2i64)inter1, 0);
out2 = __msa_copy_u_d((v2i64)inter2, 0);
out3 = __msa_copy_u_d((v2i64)inter3, 0);
SD4(out0, out1, out2, out3, p_dst, dst_stride);
out0 = __msa_copy_u_d((v2i64)inter4, 0);
out1 = __msa_copy_u_d((v2i64)inter5, 0);
out2 = __msa_copy_u_d((v2i64)inter6, 0);
out3 = __msa_copy_u_d((v2i64)inter7, 0);
SD4(out0, out1, out2, out3, p_dst + 4 * dst_stride, dst_stride);
out0 = __msa_copy_u_d((v2i64)inter8, 0);
out1 = __msa_copy_u_d((v2i64)inter9, 0);
out2 = __msa_copy_u_d((v2i64)inter10, 0);
out3 = __msa_copy_u_d((v2i64)inter11, 0);
SD4(out0, out1, out2, out3, p_dst + 8 * dst_stride, dst_stride);
out0 = __msa_copy_u_d((v2i64)inter12, 0);
out1 = __msa_copy_u_d((v2i64)inter13, 0);
out2 = __msa_copy_u_d((v2i64)inter14, 0);
out3 = __msa_copy_u_d((v2i64)inter15, 0);
SD4(out0, out1, out2, out3, p_dst + 12 * dst_stride, dst_stride);
}
f = f_orig;
p_dst = dst_ptr - 2;
LD_UB8(p_dst, dst_stride, inter0, inter1, inter2, inter3, inter4, inter5,