Merge "Move subtract functions from vp9 to vpx_dsp"
This commit is contained in:
commit
fcb5a8692a
@ -14,6 +14,7 @@
|
||||
#include "test/register_state_check.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
@ -89,15 +90,15 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest,
|
||||
::testing::Values(vp9_subtract_block_c));
|
||||
::testing::Values(vpx_subtract_block_c));
|
||||
|
||||
#if HAVE_SSE2 && CONFIG_USE_X86INC
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest,
|
||||
::testing::Values(vp9_subtract_block_sse2));
|
||||
::testing::Values(vpx_subtract_block_sse2));
|
||||
#endif
|
||||
#if HAVE_NEON
|
||||
INSTANTIATE_TEST_CASE_P(NEON, VP9SubtractBlockTest,
|
||||
::testing::Values(vp9_subtract_block_neon));
|
||||
::testing::Values(vpx_subtract_block_neon));
|
||||
#endif
|
||||
|
||||
} // namespace vp9
|
||||
|
@ -19,6 +19,8 @@
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "rdopt.h"
|
||||
|
||||
// TODO(jingning,johannkoenig): use vpx_subtract_block to replace
|
||||
// codec specified vp9_subtract_ functions.
|
||||
void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
{
|
||||
unsigned char *src_ptr = (*(be->base_src) + be->src);
|
||||
|
@ -922,9 +922,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
# ENCODEMB INVOKE
|
||||
|
||||
add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
|
||||
specialize qw/vp9_subtract_block neon msa/, "$sse2_x86inc";
|
||||
|
||||
#
|
||||
# Denoiser
|
||||
#
|
||||
@ -1328,9 +1325,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
|
||||
specialize qw/vp9_highbd_block_error sse2/;
|
||||
|
||||
add_proto qw/void vp9_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
|
||||
specialize qw/vp9_highbd_subtract_block/;
|
||||
|
||||
add_proto qw/void vp9_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_highbd_quantize_fp/;
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
@ -31,45 +32,6 @@ struct optimize_ctx {
|
||||
ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
|
||||
};
|
||||
|
||||
void vp9_subtract_block_c(int rows, int cols,
|
||||
int16_t *diff, ptrdiff_t diff_stride,
|
||||
const uint8_t *src, ptrdiff_t src_stride,
|
||||
const uint8_t *pred, ptrdiff_t pred_stride) {
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < rows; r++) {
|
||||
for (c = 0; c < cols; c++)
|
||||
diff[c] = src[c] - pred[c];
|
||||
|
||||
diff += diff_stride;
|
||||
pred += pred_stride;
|
||||
src += src_stride;
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vp9_highbd_subtract_block_c(int rows, int cols,
|
||||
int16_t *diff, ptrdiff_t diff_stride,
|
||||
const uint8_t *src8, ptrdiff_t src_stride,
|
||||
const uint8_t *pred8, ptrdiff_t pred_stride,
|
||||
int bd) {
|
||||
int r, c;
|
||||
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
|
||||
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
|
||||
(void) bd;
|
||||
|
||||
for (r = 0; r < rows; r++) {
|
||||
for (c = 0; c < cols; c++) {
|
||||
diff[c] = src[c] - pred[c];
|
||||
}
|
||||
|
||||
diff += diff_stride;
|
||||
pred += pred_stride;
|
||||
src += src_stride;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
|
||||
struct macroblock_plane *const p = &x->plane[plane];
|
||||
const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
|
||||
@ -79,13 +41,13 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp9_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
|
||||
vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
|
||||
p->src.stride, pd->dst.buf, pd->dst.stride,
|
||||
x->e_mbd.bd);
|
||||
return;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
|
||||
vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
|
||||
pd->dst.buf, pd->dst.stride);
|
||||
}
|
||||
|
||||
@ -838,7 +800,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
if (!x->skip_recode) {
|
||||
vp9_highbd_subtract_block(32, 32, src_diff, diff_stride,
|
||||
vpx_highbd_subtract_block(32, 32, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride, xd->bd);
|
||||
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
|
||||
@ -859,7 +821,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
if (!x->skip_recode) {
|
||||
vp9_highbd_subtract_block(16, 16, src_diff, diff_stride,
|
||||
vpx_highbd_subtract_block(16, 16, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride, xd->bd);
|
||||
vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
|
||||
vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
|
||||
@ -881,7 +843,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
if (!x->skip_recode) {
|
||||
vp9_highbd_subtract_block(8, 8, src_diff, diff_stride,
|
||||
vpx_highbd_subtract_block(8, 8, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride, xd->bd);
|
||||
vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
|
||||
vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
|
||||
@ -904,7 +866,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
dst, dst_stride, i, j, plane);
|
||||
|
||||
if (!x->skip_recode) {
|
||||
vp9_highbd_subtract_block(4, 4, src_diff, diff_stride,
|
||||
vpx_highbd_subtract_block(4, 4, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride, xd->bd);
|
||||
if (tx_type != DCT_DCT)
|
||||
vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
|
||||
@ -946,7 +908,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
if (!x->skip_recode) {
|
||||
vp9_subtract_block(32, 32, src_diff, diff_stride,
|
||||
vpx_subtract_block(32, 32, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride);
|
||||
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
|
||||
@ -966,7 +928,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
if (!x->skip_recode) {
|
||||
vp9_subtract_block(16, 16, src_diff, diff_stride,
|
||||
vpx_subtract_block(16, 16, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride);
|
||||
vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
|
||||
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
|
||||
@ -986,7 +948,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
if (!x->skip_recode) {
|
||||
vp9_subtract_block(8, 8, src_diff, diff_stride,
|
||||
vpx_subtract_block(8, 8, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride);
|
||||
vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
|
||||
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
|
||||
@ -1007,7 +969,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
dst, dst_stride, i, j, plane);
|
||||
|
||||
if (!x->skip_recode) {
|
||||
vp9_subtract_block(4, 4, src_diff, diff_stride,
|
||||
vpx_subtract_block(4, 4, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride);
|
||||
if (tx_type != DCT_DCT)
|
||||
vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <math.h>
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
@ -832,7 +833,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
||||
x->skip_encode ? src : dst,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, idx, idy, 0);
|
||||
vp9_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
|
||||
vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
|
||||
dst, dst_stride, xd->bd);
|
||||
if (xd->lossless) {
|
||||
const scan_order *so = &vp9_default_scan_orders[TX_4X4];
|
||||
@ -932,7 +933,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
||||
x->skip_encode ? src : dst,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, idx, idy, 0);
|
||||
vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
|
||||
vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
|
||||
|
||||
if (xd->lossless) {
|
||||
const scan_order *so = &vp9_default_scan_orders[TX_4X4];
|
||||
@ -1394,16 +1395,16 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp9_highbd_subtract_block(
|
||||
vpx_highbd_subtract_block(
|
||||
height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
|
||||
8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
|
||||
} else {
|
||||
vp9_subtract_block(
|
||||
vpx_subtract_block(
|
||||
height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
|
||||
8, src, p->src.stride, dst, pd->dst.stride);
|
||||
}
|
||||
#else
|
||||
vp9_subtract_block(height, width,
|
||||
vpx_subtract_block(height, width,
|
||||
vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
|
||||
8, src, p->src.stride, dst, pd->dst.stride);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
@ -114,7 +114,6 @@ endif
|
||||
ifeq ($(CONFIG_USE_X86INC),yes)
|
||||
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
|
||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
|
||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
|
||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
|
||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
|
||||
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
|
||||
@ -151,7 +150,6 @@ VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_dct_neon.c
|
||||
endif
|
||||
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_avg_neon.c
|
||||
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c
|
||||
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_subtract_neon.c
|
||||
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_variance_neon.c
|
||||
|
||||
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_avg_msa.c
|
||||
@ -161,7 +159,6 @@ VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c
|
||||
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct16x16_msa.c
|
||||
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct32x32_msa.c
|
||||
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct_msa.h
|
||||
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_subtract_msa.c
|
||||
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_temporal_filter_msa.c
|
||||
|
||||
VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes))
|
||||
|
@ -9,12 +9,11 @@
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
void vp9_subtract_block_neon(int rows, int cols,
|
||||
void vpx_subtract_block_neon(int rows, int cols,
|
||||
int16_t *diff, ptrdiff_t diff_stride,
|
||||
const uint8_t *src, ptrdiff_t src_stride,
|
||||
const uint8_t *pred, ptrdiff_t pred_stride) {
|
@ -24,6 +24,9 @@
|
||||
#define LD_UH(...) LD_H(v8u16, __VA_ARGS__)
|
||||
#define LD_SH(...) LD_H(v8i16, __VA_ARGS__)
|
||||
|
||||
#define ST_H(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
|
||||
#define ST_SH(...) ST_H(v8i16, __VA_ARGS__)
|
||||
|
||||
#if (__mips_isa_rev >= 6)
|
||||
#define LW(psrc) ({ \
|
||||
const uint8_t *psrc_m = (const uint8_t *)(psrc); \
|
||||
@ -38,6 +41,61 @@
|
||||
\
|
||||
val_m; \
|
||||
})
|
||||
|
||||
#if (__mips == 64)
|
||||
#define LD(psrc) ({ \
|
||||
const uint8_t *psrc_m = (const uint8_t *)(psrc); \
|
||||
uint64_t val_m = 0; \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"ld %[val_m], %[psrc_m] \n\t" \
|
||||
\
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [psrc_m] "m" (*psrc_m) \
|
||||
); \
|
||||
\
|
||||
val_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LD(psrc) ({ \
|
||||
const uint8_t *psrc_m = (const uint8_t *)(psrc); \
|
||||
uint32_t val0_m, val1_m; \
|
||||
uint64_t val_m = 0; \
|
||||
\
|
||||
val0_m = LW(psrc_m); \
|
||||
val1_m = LW(psrc_m + 4); \
|
||||
\
|
||||
val_m = (uint64_t)(val1_m); \
|
||||
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \
|
||||
val_m = (uint64_t)(val_m | (uint64_t)val0_m); \
|
||||
\
|
||||
val_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
|
||||
#define SW(val, pdst) { \
|
||||
uint8_t *pdst_m = (uint8_t *)(pdst); \
|
||||
const uint32_t val_m = (val); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"sw %[val_m], %[pdst_m] \n\t" \
|
||||
\
|
||||
: [pdst_m] "=m" (*pdst_m) \
|
||||
: [val_m] "r" (val_m) \
|
||||
); \
|
||||
}
|
||||
|
||||
#define SD(val, pdst) { \
|
||||
uint8_t *pdst_m = (uint8_t *)(pdst); \
|
||||
const uint64_t val_m = (val); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"sd %[val_m], %[pdst_m] \n\t" \
|
||||
\
|
||||
: [pdst_m] "=m" (*pdst_m) \
|
||||
: [val_m] "r" (val_m) \
|
||||
); \
|
||||
}
|
||||
#else // !(__mips_isa_rev >= 6)
|
||||
#define LW(psrc) ({ \
|
||||
const uint8_t *psrc_m = (const uint8_t *)(psrc); \
|
||||
@ -52,6 +110,60 @@
|
||||
\
|
||||
val_m; \
|
||||
})
|
||||
|
||||
#define SW(val, pdst) { \
|
||||
uint8_t *pdst_m = (uint8_t *)(pdst); \
|
||||
const uint32_t val_m = (val); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"usw %[val_m], %[pdst_m] \n\t" \
|
||||
\
|
||||
: [pdst_m] "=m" (*pdst_m) \
|
||||
: [val_m] "r" (val_m) \
|
||||
); \
|
||||
}
|
||||
|
||||
#if (__mips == 64)
|
||||
#define LD(psrc) ({ \
|
||||
const uint8_t *psrc_m = (const uint8_t *)(psrc); \
|
||||
uint64_t val_m = 0; \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
"uld %[val_m], %[psrc_m] \n\t" \
|
||||
\
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [psrc_m] "m" (*psrc_m) \
|
||||
); \
|
||||
\
|
||||
val_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LD(psrc) ({ \
|
||||
const uint8_t *psrc_m1 = (const uint8_t *)(psrc); \
|
||||
uint32_t val0_m, val1_m; \
|
||||
uint64_t val_m = 0; \
|
||||
\
|
||||
val0_m = LW(psrc_m1); \
|
||||
val1_m = LW(psrc_m1 + 4); \
|
||||
\
|
||||
val_m = (uint64_t)(val1_m); \
|
||||
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \
|
||||
val_m = (uint64_t)(val_m | (uint64_t)val0_m); \
|
||||
\
|
||||
val_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
|
||||
#define SD(val, pdst) { \
|
||||
uint8_t *pdst_m1 = (uint8_t *)(pdst); \
|
||||
uint32_t val0_m, val1_m; \
|
||||
\
|
||||
val0_m = (uint32_t)((val) & 0x00000000FFFFFFFF); \
|
||||
val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
|
||||
\
|
||||
SW(val0_m, pdst_m1); \
|
||||
SW(val1_m, pdst_m1 + 4); \
|
||||
}
|
||||
#endif // (__mips_isa_rev >= 6)
|
||||
|
||||
/* Description : Load 4 words with stride
|
||||
@ -69,6 +181,21 @@
|
||||
out3 = LW((psrc) + 3 * stride); \
|
||||
}
|
||||
|
||||
/* Description : Load double words with stride
|
||||
Arguments : Inputs - psrc, stride
|
||||
Outputs - out0, out1
|
||||
Details : Load double word in 'out0' from (psrc)
|
||||
Load double word in 'out1' from (psrc + stride)
|
||||
*/
|
||||
#define LD2(psrc, stride, out0, out1) { \
|
||||
out0 = LD((psrc)); \
|
||||
out1 = LD((psrc) + stride); \
|
||||
}
|
||||
#define LD4(psrc, stride, out0, out1, out2, out3) { \
|
||||
LD2((psrc), stride, out0, out1); \
|
||||
LD2((psrc) + 2 * stride, stride, out2, out3); \
|
||||
}
|
||||
|
||||
/* Description : Load vectors with 16 byte elements with stride
|
||||
Arguments : Inputs - psrc, stride
|
||||
Outputs - out0, out1
|
||||
@ -81,6 +208,7 @@
|
||||
out1 = LD_B(RTYPE, (psrc) + stride); \
|
||||
}
|
||||
#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
|
||||
#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)
|
||||
|
||||
#define LD_B3(RTYPE, psrc, stride, out0, out1, out2) { \
|
||||
LD_B2(RTYPE, (psrc), stride, out0, out1); \
|
||||
@ -93,6 +221,7 @@
|
||||
LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
|
||||
}
|
||||
#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
|
||||
#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)
|
||||
|
||||
#define LD_B5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) { \
|
||||
LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
|
||||
@ -100,6 +229,14 @@
|
||||
}
|
||||
#define LD_UB5(...) LD_B5(v16u8, __VA_ARGS__)
|
||||
|
||||
#define LD_B8(RTYPE, psrc, stride, \
|
||||
out0, out1, out2, out3, out4, out5, out6, out7) { \
|
||||
LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
|
||||
LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \
|
||||
}
|
||||
#define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__)
|
||||
#define LD_SB8(...) LD_B8(v16i8, __VA_ARGS__)
|
||||
|
||||
/* Description : Load vectors with 8 halfword elements with stride
|
||||
Arguments : Inputs - psrc, stride
|
||||
Outputs - out0, out1
|
||||
@ -271,6 +408,13 @@
|
||||
#define INSERT_W4_UB(...) INSERT_W4(v16u8, __VA_ARGS__)
|
||||
#define INSERT_W4_SB(...) INSERT_W4(v16i8, __VA_ARGS__)
|
||||
|
||||
#define INSERT_D2(RTYPE, in0, in1, out) { \
|
||||
out = (RTYPE)__msa_insert_d((v2i64)out, 0, in0); \
|
||||
out = (RTYPE)__msa_insert_d((v2i64)out, 1, in1); \
|
||||
}
|
||||
#define INSERT_D2_UB(...) INSERT_D2(v16u8, __VA_ARGS__)
|
||||
#define INSERT_D2_SB(...) INSERT_D2(v16i8, __VA_ARGS__)
|
||||
|
||||
/* Description : Interleave both left and right half of input vectors
|
||||
Arguments : Inputs - in0, in1
|
||||
Outputs - out0, out1
|
||||
@ -328,4 +472,53 @@
|
||||
tmp_m = __msa_clti_s_h((v8i16)in, 0); \
|
||||
ILVRL_H2_SW(tmp_m, in, out0, out1); \
|
||||
}
|
||||
|
||||
/* Description : Store 4 double words with stride
|
||||
Arguments : Inputs - in0, in1, in2, in3, pdst, stride
|
||||
Details : Store double word from 'in0' to (pdst)
|
||||
Store double word from 'in1' to (pdst + stride)
|
||||
Store double word from 'in2' to (pdst + 2 * stride)
|
||||
Store double word from 'in3' to (pdst + 3 * stride)
|
||||
*/
|
||||
#define SD4(in0, in1, in2, in3, pdst, stride) { \
|
||||
SD(in0, (pdst)) \
|
||||
SD(in1, (pdst) + stride); \
|
||||
SD(in2, (pdst) + 2 * stride); \
|
||||
SD(in3, (pdst) + 3 * stride); \
|
||||
}
|
||||
|
||||
/* Description : Store vectors of 8 halfword elements with stride
|
||||
Arguments : Inputs - in0, in1, pdst, stride
|
||||
Details : Store 8 halfword elements from 'in0' to (pdst)
|
||||
Store 8 halfword elements from 'in1' to (pdst + stride)
|
||||
*/
|
||||
#define ST_H2(RTYPE, in0, in1, pdst, stride) { \
|
||||
ST_H(RTYPE, in0, (pdst)); \
|
||||
ST_H(RTYPE, in1, (pdst) + stride); \
|
||||
}
|
||||
#define ST_SH2(...) ST_H2(v8i16, __VA_ARGS__)
|
||||
|
||||
/* Description : Store 8x4 byte block to destination memory from input
|
||||
vectors
|
||||
Arguments : Inputs - in0, in1, pdst, stride
|
||||
Details : Index 0 double word element from 'in0' vector is copied to the
|
||||
GP register and stored to (pdst)
|
||||
Index 1 double word element from 'in0' vector is copied to the
|
||||
GP register and stored to (pdst + stride)
|
||||
Index 0 double word element from 'in1' vector is copied to the
|
||||
GP register and stored to (pdst + 2 * stride)
|
||||
Index 1 double word element from 'in1' vector is copied to the
|
||||
GP register and stored to (pdst + 3 * stride)
|
||||
*/
|
||||
#define ST8x4_UB(in0, in1, pdst, stride) { \
|
||||
uint64_t out0_m, out1_m, out2_m, out3_m; \
|
||||
uint8_t *pblk_8x4_m = (uint8_t *)(pdst); \
|
||||
\
|
||||
out0_m = __msa_copy_u_d((v2i64)in0, 0); \
|
||||
out1_m = __msa_copy_u_d((v2i64)in0, 1); \
|
||||
out2_m = __msa_copy_u_d((v2i64)in1, 0); \
|
||||
out3_m = __msa_copy_u_d((v2i64)in1, 1); \
|
||||
\
|
||||
SD4(out0_m, out1_m, out2_m, out3_m, pblk_8x4_m, stride); \
|
||||
}
|
||||
#endif /* VPX_DSP_MIPS_MACROS_MSA_H_ */
|
||||
|
@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/mips/msa/vp9_macros_msa.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vpx_dsp/mips/macros_msa.h"
|
||||
|
||||
static void sub_blk_4x4_msa(const uint8_t *src_ptr, int32_t src_stride,
|
||||
const uint8_t *pred_ptr, int32_t pred_stride,
|
||||
@ -226,7 +226,7 @@ static void sub_blk_64x64_msa(const uint8_t *src, int32_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_subtract_block_msa(int32_t rows, int32_t cols,
|
||||
void vpx_subtract_block_msa(int32_t rows, int32_t cols,
|
||||
int16_t *diff_ptr, ptrdiff_t diff_stride,
|
||||
const uint8_t *src_ptr, ptrdiff_t src_stride,
|
||||
const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
|
||||
@ -253,12 +253,12 @@ void vp9_subtract_block_msa(int32_t rows, int32_t cols,
|
||||
diff_ptr, diff_stride);
|
||||
break;
|
||||
default:
|
||||
vp9_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr,
|
||||
vpx_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr,
|
||||
src_stride, pred_ptr, pred_stride);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
vp9_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr, src_stride,
|
||||
vpx_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr, src_stride,
|
||||
pred_ptr, pred_stride);
|
||||
}
|
||||
}
|
56
vpx_dsp/subtract.c
Normal file
56
vpx_dsp/subtract.c
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
void vpx_subtract_block_c(int rows, int cols,
|
||||
int16_t *diff, ptrdiff_t diff_stride,
|
||||
const uint8_t *src, ptrdiff_t src_stride,
|
||||
const uint8_t *pred, ptrdiff_t pred_stride) {
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < rows; r++) {
|
||||
for (c = 0; c < cols; c++)
|
||||
diff[c] = src[c] - pred[c];
|
||||
|
||||
diff += diff_stride;
|
||||
pred += pred_stride;
|
||||
src += src_stride;
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vpx_highbd_subtract_block_c(int rows, int cols,
|
||||
int16_t *diff, ptrdiff_t diff_stride,
|
||||
const uint8_t *src8, ptrdiff_t src_stride,
|
||||
const uint8_t *pred8, ptrdiff_t pred_stride,
|
||||
int bd) {
|
||||
int r, c;
|
||||
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
|
||||
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
|
||||
(void) bd;
|
||||
|
||||
for (r = 0; r < rows; r++) {
|
||||
for (c = 0; c < cols; c++) {
|
||||
diff[c] = src[c] - pred[c];
|
||||
}
|
||||
|
||||
diff += diff_stride;
|
||||
pred += pred_stride;
|
||||
src += src_stride;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
@ -12,13 +12,16 @@ DSP_SRCS-yes += vpx_dsp.mk
|
||||
|
||||
ifeq ($(CONFIG_ENCODERS),yes)
|
||||
DSP_SRCS-yes += sad.c
|
||||
DSP_SRCS-yes += subtract.c
|
||||
|
||||
DSP_SRCS-$(HAVE_MEDIA) += arm/sad_media$(ASM)
|
||||
DSP_SRCS-$(HAVE_NEON) += arm/sad4d_neon.c
|
||||
DSP_SRCS-$(HAVE_NEON) += arm/sad_neon.c
|
||||
DSP_SRCS-$(HAVE_NEON) += arm/subtract_neon.c
|
||||
|
||||
DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h
|
||||
DSP_SRCS-$(HAVE_MSA) += mips/sad_msa.c
|
||||
DSP_SRCS-$(HAVE_MSA) += mips/subtract_msa.c
|
||||
|
||||
DSP_SRCS-$(HAVE_MMX) += x86/sad_mmx.asm
|
||||
DSP_SRCS-$(HAVE_SSE3) += x86/sad_sse3.asm
|
||||
@ -30,6 +33,7 @@ DSP_SRCS-$(HAVE_AVX2) += x86/sad_avx2.c
|
||||
ifeq ($(CONFIG_USE_X86INC),yes)
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/sad_sse2.asm
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/subtract_sse2.asm
|
||||
|
||||
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm
|
||||
|
@ -36,6 +36,12 @@ if ($opts{arch} eq "x86_64") {
|
||||
}
|
||||
|
||||
if (vpx_config("CONFIG_ENCODERS") eq "yes") {
|
||||
#
|
||||
# Block subtraction
|
||||
#
|
||||
add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
|
||||
specialize qw/vpx_subtract_block neon msa/, "$sse2_x86inc";
|
||||
|
||||
#
|
||||
# Single block SAD
|
||||
#
|
||||
@ -210,6 +216,12 @@ add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const
|
||||
specialize qw/vpx_sad4x4x4d msa/, "$sse_x86inc";
|
||||
|
||||
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
#
|
||||
# Block subtraction
|
||||
#
|
||||
add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
|
||||
specialize qw/vpx_highbd_subtract_block/;
|
||||
|
||||
#
|
||||
# Single block SAD
|
||||
#
|
||||
|
@ -7,12 +7,13 @@
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
%define program_name vpx
|
||||
|
||||
%include "third_party/x86inc/x86inc.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
; void vp9_subtract_block(int rows, int cols,
|
||||
; void vpx_subtract_block(int rows, int cols,
|
||||
; int16_t *diff, ptrdiff_t diff_stride,
|
||||
; const uint8_t *src, ptrdiff_t src_stride,
|
||||
; const uint8_t *pred, ptrdiff_t pred_stride)
|
Loading…
x
Reference in New Issue
Block a user