Merge branch 'master' into nextgenv2
This commit is contained in:
@@ -141,7 +141,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
|
||||
&vpx_highbd_tm_predictor_16x16_c, 16, 8),
|
||||
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
|
||||
&vpx_highbd_tm_predictor_32x32_c, 32, 8),
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
|
||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
|
||||
@@ -155,14 +155,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
|
||||
&vpx_highbd_v_predictor_16x16_c, 16, 8),
|
||||
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
|
||||
&vpx_highbd_v_predictor_32x32_c, 32, 8),
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
|
||||
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
|
||||
#else
|
||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
|
||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
|
||||
@@ -176,7 +176,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
|
||||
&vpx_highbd_v_predictor_16x16_c, 16, 8),
|
||||
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
|
||||
&vpx_highbd_v_predictor_32x32_c, 32, 8),
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
|
||||
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
|
||||
@@ -194,7 +194,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
|
||||
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
|
||||
&vpx_highbd_tm_predictor_32x32_c, 32,
|
||||
10),
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
|
||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
|
||||
@@ -211,14 +211,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
|
||||
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
|
||||
&vpx_highbd_v_predictor_32x32_c, 32,
|
||||
10),
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
|
||||
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
|
||||
#else
|
||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
|
||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
|
||||
@@ -233,7 +233,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
|
||||
&vpx_highbd_v_predictor_16x16_c, 16, 10),
|
||||
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
|
||||
&vpx_highbd_v_predictor_32x32_c, 32, 10),
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
|
||||
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
|
||||
@@ -251,7 +251,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
|
||||
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
|
||||
&vpx_highbd_tm_predictor_32x32_c, 32,
|
||||
12),
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
|
||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
|
||||
@@ -268,14 +268,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
|
||||
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
|
||||
&vpx_highbd_v_predictor_32x32_c, 32,
|
||||
12),
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
|
||||
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
|
||||
#else
|
||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
|
||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
|
||||
@@ -290,7 +290,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
|
||||
&vpx_highbd_v_predictor_16x16_c, 16, 12),
|
||||
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
|
||||
&vpx_highbd_v_predictor_32x32_c, 32, 12),
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
|
||||
make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
|
||||
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
|
||||
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
|
||||
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
|
||||
|
@@ -190,7 +190,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
|
||||
BLOCK_SIZE bsize,
|
||||
int64_t rate,
|
||||
int64_t dist,
|
||||
int skip) {
|
||||
int skip,
|
||||
struct macroblock_plane *const p) {
|
||||
const VP9_COMMON *const cm = &cpi->common;
|
||||
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
|
||||
const int bw = num_8x8_blocks_wide_lookup[bsize];
|
||||
@@ -198,12 +199,33 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
|
||||
const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
|
||||
const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
|
||||
const int block_index = mi_row * cm->mi_cols + mi_col;
|
||||
const int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist,
|
||||
bsize);
|
||||
int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist, bsize);
|
||||
// Default is to not update the refresh map.
|
||||
int new_map_value = cr->map[block_index];
|
||||
int x = 0; int y = 0;
|
||||
|
||||
int is_skin = 0;
|
||||
if (refresh_this_block == 0 &&
|
||||
bsize <= BLOCK_16X16 &&
|
||||
cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
|
||||
// Take center pixel in block to determine is_skin.
|
||||
const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1;
|
||||
const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1;
|
||||
const int uv_width_shift = y_width_shift >> 1;
|
||||
const int uv_height_shift = y_height_shift >> 1;
|
||||
const int stride = p[0].src.stride;
|
||||
const int strideuv = p[1].src.stride;
|
||||
const uint8_t ysource =
|
||||
p[0].src.buf[y_height_shift * stride + y_width_shift];
|
||||
const uint8_t usource =
|
||||
p[1].src.buf[uv_height_shift * strideuv + uv_width_shift];
|
||||
const uint8_t vsource =
|
||||
p[2].src.buf[uv_height_shift * strideuv + uv_width_shift];
|
||||
is_skin = vp9_skin_pixel(ysource, usource, vsource);
|
||||
if (is_skin)
|
||||
refresh_this_block = 1;
|
||||
}
|
||||
|
||||
// If this block is labeled for refresh, check if we should reset the
|
||||
// segment_id.
|
||||
if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
|
||||
|
@@ -14,6 +14,8 @@
|
||||
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
#include "vp9/encoder/vp9_block.h"
|
||||
#include "vp9/encoder/vp9_skin_detection.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -93,7 +95,8 @@ int vp9_cyclic_refresh_rc_bits_per_mb(const struct VP9_COMP *cpi, int i,
|
||||
void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi,
|
||||
MB_MODE_INFO *const mbmi,
|
||||
int mi_row, int mi_col, BLOCK_SIZE bsize,
|
||||
int64_t rate, int64_t dist, int skip);
|
||||
int64_t rate, int64_t dist, int skip,
|
||||
struct macroblock_plane *const p);
|
||||
|
||||
void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi,
|
||||
const MB_MODE_INFO *const mbmi,
|
||||
|
@@ -1045,7 +1045,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
|
||||
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
|
||||
vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row,
|
||||
mi_col, bsize, ctx->rate, ctx->dist,
|
||||
x->skip);
|
||||
x->skip, p);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1705,6 +1705,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
|
||||
MACROBLOCKD *const xd = &x->e_mbd;
|
||||
MODE_INFO *const mi = xd->mi[0];
|
||||
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
|
||||
struct macroblock_plane *const p = x->plane;
|
||||
const struct segmentation *const seg = &cm->seg;
|
||||
const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
|
||||
const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
|
||||
@@ -1725,7 +1726,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
|
||||
} else {
|
||||
// Setting segmentation map for cyclic_refresh.
|
||||
vp9_cyclic_refresh_update_segment(cpi, mbmi, mi_row, mi_col, bsize,
|
||||
ctx->rate, ctx->dist, x->skip);
|
||||
ctx->rate, ctx->dist, x->skip, p);
|
||||
}
|
||||
vp9_init_plane_quantizers(cpi, x);
|
||||
}
|
||||
|
@@ -1349,11 +1349,25 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
|
||||
const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter];
|
||||
|
||||
for (ref = 0; ref < 1 + is_compound; ++ref) {
|
||||
const uint8_t *pre = &pd->pre[ref].buf[vp9_raster_block_offset(BLOCK_8X8, i,
|
||||
pd->pre[ref].stride)];
|
||||
const int bw = b_width_log2_lookup[BLOCK_8X8];
|
||||
const int h = 4 * (i >> bw);
|
||||
const int w = 4 * (i & ((1 << bw) - 1));
|
||||
const struct scale_factors *sf = &xd->block_refs[ref]->sf;
|
||||
int y_stride = pd->pre[ref].stride;
|
||||
uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w);
|
||||
|
||||
if (vp9_is_scaled(sf)) {
|
||||
const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
|
||||
const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
|
||||
|
||||
y_stride = xd->block_refs[ref]->buf->y_stride;
|
||||
pre = xd->block_refs[ref]->buf->y_buffer;
|
||||
pre += scaled_buffer_offset(x_start + w, y_start + h,
|
||||
y_stride, sf);
|
||||
}
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,
|
||||
vp9_highbd_build_inter_predictor(pre, y_stride,
|
||||
dst, pd->dst.stride,
|
||||
&mi->bmi[i].as_mv[ref].as_mv,
|
||||
&xd->block_refs[ref]->sf, width, height,
|
||||
@@ -1361,7 +1375,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
|
||||
mi_col * MI_SIZE + 4 * (i % 2),
|
||||
mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
|
||||
} else {
|
||||
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
|
||||
vp9_build_inter_predictor(pre, y_stride,
|
||||
dst, pd->dst.stride,
|
||||
&mi->bmi[i].as_mv[ref].as_mv,
|
||||
&xd->block_refs[ref]->sf, width, height, ref,
|
||||
@@ -1370,7 +1384,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
|
||||
mi_row * MI_SIZE + 4 * (i / 2));
|
||||
}
|
||||
#else
|
||||
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
|
||||
vp9_build_inter_predictor(pre, y_stride,
|
||||
dst, pd->dst.stride,
|
||||
&mi->bmi[i].as_mv[ref].as_mv,
|
||||
&xd->block_refs[ref]->sf, width, height, ref,
|
||||
|
@@ -291,10 +291,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vpx_highbd_v_predictor_4x4/, "$sse2_x86inc";
|
||||
|
||||
add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc";
|
||||
specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse2_x86inc";
|
||||
|
||||
add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc";
|
||||
specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc";
|
||||
|
||||
add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vpx_highbd_dc_top_predictor_4x4/;
|
||||
|
@@ -17,24 +17,20 @@ pw_16: times 4 dd 16
|
||||
pw_32: times 4 dd 32
|
||||
|
||||
SECTION .text
|
||||
INIT_MMX sse
|
||||
INIT_XMM sse2
|
||||
cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
|
||||
GET_GOT goffsetq
|
||||
|
||||
movq m0, [aboveq]
|
||||
movq m2, [leftq]
|
||||
DEFINE_ARGS dst, stride, one
|
||||
mov oned, 0x0001
|
||||
pxor m1, m1
|
||||
movd m3, oned
|
||||
pshufw m3, m3, 0x0
|
||||
paddw m0, m2
|
||||
pmaddwd m0, m3
|
||||
packssdw m0, m1
|
||||
pmaddwd m0, m3
|
||||
pshuflw m1, m0, 0xe
|
||||
paddw m0, m1
|
||||
pshuflw m1, m0, 0x1
|
||||
paddw m0, m1
|
||||
paddw m0, [GLOBAL(pw_4)]
|
||||
psraw m0, 3
|
||||
pshufw m0, m0, 0x0
|
||||
pshuflw m0, m0, 0x0
|
||||
movq [dstq ], m0
|
||||
movq [dstq+strideq*2], m0
|
||||
lea dstq, [dstq+strideq*4]
|
||||
@@ -261,43 +257,44 @@ cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above
|
||||
jnz .loop
|
||||
REP_RET
|
||||
|
||||
INIT_MMX sse
|
||||
cglobal highbd_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one
|
||||
INIT_XMM sse2
|
||||
cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bps
|
||||
movd m1, [aboveq-2]
|
||||
movq m0, [aboveq]
|
||||
pshufw m1, m1, 0x0
|
||||
pshuflw m1, m1, 0x0
|
||||
movlhps m0, m0 ; t1 t2 t3 t4 t1 t2 t3 t4
|
||||
movlhps m1, m1 ; tl tl tl tl tl tl tl tl
|
||||
; Get the values to compute the maximum value at this bit depth
|
||||
mov oned, 1
|
||||
movd m3, oned
|
||||
pcmpeqw m3, m3
|
||||
movd m4, bpsd
|
||||
pshufw m3, m3, 0x0
|
||||
DEFINE_ARGS dst, stride, line, left
|
||||
mov lineq, -2
|
||||
mova m2, m3
|
||||
psubw m0, m1 ; t1-tl t2-tl t3-tl t4-tl
|
||||
psllw m3, m4
|
||||
add leftq, 8
|
||||
psubw m3, m2 ; max possible value
|
||||
pxor m4, m4 ; min possible value
|
||||
psubw m0, m1
|
||||
.loop:
|
||||
movq m1, [leftq+lineq*4]
|
||||
movq m2, [leftq+lineq*4+2]
|
||||
pshufw m1, m1, 0x0
|
||||
pshufw m2, m2, 0x0
|
||||
paddw m1, m0
|
||||
pcmpeqw m2, m2
|
||||
pxor m4, m4 ; min possible value
|
||||
pxor m3, m2 ; max possible value
|
||||
mova m1, [leftq]
|
||||
pshuflw m2, m1, 0x0
|
||||
pshuflw m5, m1, 0x55
|
||||
movlhps m2, m5 ; l1 l1 l1 l1 l2 l2 l2 l2
|
||||
paddw m2, m0
|
||||
;Clamp to the bit-depth
|
||||
pminsw m1, m3
|
||||
pminsw m2, m3
|
||||
pmaxsw m1, m4
|
||||
pmaxsw m2, m4
|
||||
;Store the values
|
||||
movq [dstq ], m1
|
||||
movq [dstq+strideq*2], m2
|
||||
movq [dstq ], m2
|
||||
movhpd [dstq+strideq*2], m2
|
||||
lea dstq, [dstq+strideq*4]
|
||||
inc lineq
|
||||
jnz .loop
|
||||
REP_RET
|
||||
pshuflw m2, m1, 0xaa
|
||||
pshuflw m5, m1, 0xff
|
||||
movlhps m2, m5
|
||||
paddw m2, m0
|
||||
;Clamp to the bit-depth
|
||||
pminsw m2, m3
|
||||
pmaxsw m2, m4
|
||||
;Store the values
|
||||
movq [dstq ], m2
|
||||
movhpd [dstq+strideq*2], m2
|
||||
RET
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one
|
||||
|
Reference in New Issue
Block a user