Implement SSE2 block_error.
Change vp9_block_error() to return a 64bit error variable, change all callers to expect a 64bit return value (this will prevent overflows, which we basically don't check for at all right now). Remove duplicate block_error() function, which fixed that through truncation. Remove old (incompatible) mmx/sse2 block_error SIMD versions and replace with a new one that returns a 64bit value. Encoding time of first 50 frames of bus @ 1500kbps goes from 3min29 to 3min23, i.e. a 3% overall speedup. Change-Id: Ib71ac5508b5ee8a80f1753cd85d72df1629abe68
This commit is contained in:
parent
7756e9892b
commit
54b2a59623
@ -529,9 +529,8 @@ prototype unsigned int vp9_get_mb_ss "const int16_t *"
|
|||||||
specialize vp9_get_mb_ss mmx sse2
|
specialize vp9_get_mb_ss mmx sse2
|
||||||
# ENCODEMB INVOKE
|
# ENCODEMB INVOKE
|
||||||
|
|
||||||
prototype int vp9_block_error "int16_t *coeff, int16_t *dqcoeff, int block_size"
|
prototype int64_t vp9_block_error "int16_t *coeff, int16_t *dqcoeff, intptr_t block_size"
|
||||||
specialize vp9_block_error mmx sse2
|
specialize vp9_block_error sse2
|
||||||
vp9_block_error_sse2=vp9_block_error_xmm
|
|
||||||
|
|
||||||
prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"
|
prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"
|
||||||
specialize vp9_subtract_block sse2
|
specialize vp9_subtract_block sse2
|
||||||
|
@ -582,7 +582,7 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
|
static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
|
||||||
TOKENEXTRA **tp, int *totalrate, int *totaldist,
|
TOKENEXTRA **tp, int *totalrate, int64_t *totaldist,
|
||||||
BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) {
|
BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) {
|
||||||
VP9_COMMON * const cm = &cpi->common;
|
VP9_COMMON * const cm = &cpi->common;
|
||||||
MACROBLOCK * const x = &cpi->mb;
|
MACROBLOCK * const x = &cpi->mb;
|
||||||
@ -1195,7 +1195,7 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
|
|||||||
}
|
}
|
||||||
static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
|
static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
|
||||||
int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize,
|
int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize,
|
||||||
int *rate, int *dist) {
|
int *rate, int64_t *dist) {
|
||||||
VP9_COMMON * const cm = &cpi->common;
|
VP9_COMMON * const cm = &cpi->common;
|
||||||
MACROBLOCK * const x = &cpi->mb;
|
MACROBLOCK * const x = &cpi->mb;
|
||||||
MACROBLOCKD *xd = &cpi->mb.e_mbd;
|
MACROBLOCKD *xd = &cpi->mb.e_mbd;
|
||||||
@ -1211,7 +1211,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
|
|||||||
BLOCK_SIZE_TYPE subsize;
|
BLOCK_SIZE_TYPE subsize;
|
||||||
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
|
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
|
||||||
PARTITION_CONTEXT sl[8], sa[8];
|
PARTITION_CONTEXT sl[8], sa[8];
|
||||||
int r = 0, d = 0;
|
int r = 0;
|
||||||
|
int64_t d = 0;
|
||||||
|
|
||||||
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
|
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
|
||||||
return;
|
return;
|
||||||
@ -1252,7 +1253,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
|
|||||||
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
|
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
|
||||||
get_block_context(x, subsize));
|
get_block_context(x, subsize));
|
||||||
if (mi_row + (bh >> 1) <= cm->mi_rows) {
|
if (mi_row + (bh >> 1) <= cm->mi_rows) {
|
||||||
int rt, dt;
|
int rt;
|
||||||
|
int64_t dt;
|
||||||
update_state(cpi, get_block_context(x, subsize), subsize, 0);
|
update_state(cpi, get_block_context(x, subsize), subsize, 0);
|
||||||
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
|
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
|
||||||
*(get_sb_index(xd, subsize)) = 1;
|
*(get_sb_index(xd, subsize)) = 1;
|
||||||
@ -1270,7 +1272,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
|
|||||||
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
|
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
|
||||||
get_block_context(x, subsize));
|
get_block_context(x, subsize));
|
||||||
if (mi_col + (bs >> 1) <= cm->mi_cols) {
|
if (mi_col + (bs >> 1) <= cm->mi_cols) {
|
||||||
int rt, dt;
|
int rt;
|
||||||
|
int64_t dt;
|
||||||
update_state(cpi, get_block_context(x, subsize), subsize, 0);
|
update_state(cpi, get_block_context(x, subsize), subsize, 0);
|
||||||
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
|
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
|
||||||
*(get_sb_index(xd, subsize)) = 1;
|
*(get_sb_index(xd, subsize)) = 1;
|
||||||
@ -1289,7 +1292,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
|
|||||||
int x_idx = (i & 1) * (bs >> 2);
|
int x_idx = (i & 1) * (bs >> 2);
|
||||||
int y_idx = (i >> 1) * (bs >> 2);
|
int y_idx = (i >> 1) * (bs >> 2);
|
||||||
int jj = i >> 1, ii = i & 0x01;
|
int jj = i >> 1, ii = i & 0x01;
|
||||||
int rt, dt;
|
int rt;
|
||||||
|
int64_t dt;
|
||||||
|
|
||||||
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
|
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
|
||||||
continue;
|
continue;
|
||||||
@ -1323,7 +1327,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
|
|||||||
// results, for encoding speed-up.
|
// results, for encoding speed-up.
|
||||||
static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
|
static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
|
||||||
int mi_col, BLOCK_SIZE_TYPE bsize, int *rate,
|
int mi_col, BLOCK_SIZE_TYPE bsize, int *rate,
|
||||||
int *dist) {
|
int64_t *dist) {
|
||||||
VP9_COMMON * const cm = &cpi->common;
|
VP9_COMMON * const cm = &cpi->common;
|
||||||
MACROBLOCK * const x = &cpi->mb;
|
MACROBLOCK * const x = &cpi->mb;
|
||||||
MACROBLOCKD * const xd = &x->e_mbd;
|
MACROBLOCKD * const xd = &x->e_mbd;
|
||||||
@ -1334,7 +1338,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
|
|||||||
TOKENEXTRA *tp_orig = *tp;
|
TOKENEXTRA *tp_orig = *tp;
|
||||||
int i, pl;
|
int i, pl;
|
||||||
BLOCK_SIZE_TYPE subsize;
|
BLOCK_SIZE_TYPE subsize;
|
||||||
int srate = INT_MAX, sdist = INT_MAX;
|
int srate = INT_MAX;
|
||||||
|
int64_t sdist = INT_MAX;
|
||||||
|
|
||||||
if (bsize < BLOCK_SIZE_SB8X8)
|
if (bsize < BLOCK_SIZE_SB8X8)
|
||||||
if (xd->ab_index != 0) {
|
if (xd->ab_index != 0) {
|
||||||
@ -1351,14 +1356,16 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
|
|||||||
|| (cpi->sf.use_partitions_greater_than
|
|| (cpi->sf.use_partitions_greater_than
|
||||||
&& bsize > cpi->sf.greater_than_block_size)) {
|
&& bsize > cpi->sf.greater_than_block_size)) {
|
||||||
if (bsize >= BLOCK_SIZE_SB8X8) {
|
if (bsize >= BLOCK_SIZE_SB8X8) {
|
||||||
int r4 = 0, d4 = 0;
|
int r4 = 0;
|
||||||
|
int64_t d4 = 0;
|
||||||
subsize = get_subsize(bsize, PARTITION_SPLIT);
|
subsize = get_subsize(bsize, PARTITION_SPLIT);
|
||||||
*(get_sb_partitioning(x, bsize)) = subsize;
|
*(get_sb_partitioning(x, bsize)) = subsize;
|
||||||
|
|
||||||
for (i = 0; i < 4; ++i) {
|
for (i = 0; i < 4; ++i) {
|
||||||
int x_idx = (i & 1) * (ms >> 1);
|
int x_idx = (i & 1) * (ms >> 1);
|
||||||
int y_idx = (i >> 1) * (ms >> 1);
|
int y_idx = (i >> 1) * (ms >> 1);
|
||||||
int r = 0, d = 0;
|
int r = 0;
|
||||||
|
int64_t d = 0;
|
||||||
|
|
||||||
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
|
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
|
||||||
continue;
|
continue;
|
||||||
@ -1386,8 +1393,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
|
|||||||
&& bsize <= cpi->sf.less_than_block_size)) {
|
&& bsize <= cpi->sf.less_than_block_size)) {
|
||||||
// PARTITION_HORZ
|
// PARTITION_HORZ
|
||||||
if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
|
if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
|
||||||
int r2, d2;
|
int r2, r = 0;
|
||||||
int r = 0, d = 0;
|
int64_t d2, d = 0;
|
||||||
subsize = get_subsize(bsize, PARTITION_HORZ);
|
subsize = get_subsize(bsize, PARTITION_HORZ);
|
||||||
*(get_sb_index(xd, subsize)) = 0;
|
*(get_sb_index(xd, subsize)) = 0;
|
||||||
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
|
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
|
||||||
@ -1418,13 +1425,15 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
|
|||||||
|
|
||||||
// PARTITION_VERT
|
// PARTITION_VERT
|
||||||
if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) {
|
if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) {
|
||||||
int r2, d2;
|
int r2;
|
||||||
|
int64_t d2;
|
||||||
subsize = get_subsize(bsize, PARTITION_VERT);
|
subsize = get_subsize(bsize, PARTITION_VERT);
|
||||||
*(get_sb_index(xd, subsize)) = 0;
|
*(get_sb_index(xd, subsize)) = 0;
|
||||||
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
|
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
|
||||||
get_block_context(x, subsize));
|
get_block_context(x, subsize));
|
||||||
if (mi_col + (ms >> 1) < cm->mi_cols) {
|
if (mi_col + (ms >> 1) < cm->mi_cols) {
|
||||||
int r = 0, d = 0;
|
int r = 0;
|
||||||
|
int64_t d = 0;
|
||||||
update_state(cpi, get_block_context(x, subsize), subsize, 0);
|
update_state(cpi, get_block_context(x, subsize), subsize, 0);
|
||||||
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
|
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
|
||||||
|
|
||||||
@ -1450,7 +1459,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
|
|||||||
// PARTITION_NONE
|
// PARTITION_NONE
|
||||||
if ((mi_row + (ms >> 1) < cm->mi_rows) &&
|
if ((mi_row + (ms >> 1) < cm->mi_rows) &&
|
||||||
(mi_col + (ms >> 1) < cm->mi_cols)) {
|
(mi_col + (ms >> 1) < cm->mi_cols)) {
|
||||||
int r, d;
|
int r;
|
||||||
|
int64_t d;
|
||||||
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize,
|
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize,
|
||||||
get_block_context(x, bsize));
|
get_block_context(x, bsize));
|
||||||
if (bsize >= BLOCK_SIZE_SB8X8) {
|
if (bsize >= BLOCK_SIZE_SB8X8) {
|
||||||
@ -1497,7 +1507,8 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
|
|||||||
// Code each SB in the row
|
// Code each SB in the row
|
||||||
for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
|
for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
|
||||||
mi_col += 64 / MI_SIZE) {
|
mi_col += 64 / MI_SIZE) {
|
||||||
int dummy_rate, dummy_dist;
|
int dummy_rate;
|
||||||
|
int64_t dummy_dist;
|
||||||
if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
|
if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
|
||||||
cpi->sf.use_one_partition_size_always ) {
|
cpi->sf.use_one_partition_size_always ) {
|
||||||
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
|
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
|
||||||
|
@ -274,12 +274,14 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
|
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
|
||||||
int i, error = 0;
|
intptr_t block_size) {
|
||||||
|
int i;
|
||||||
|
int64_t error = 0;
|
||||||
|
|
||||||
for (i = 0; i < block_size; i++) {
|
for (i = 0; i < block_size; i++) {
|
||||||
int this_diff = coeff[i] - dqcoeff[i];
|
int this_diff = coeff[i] - dqcoeff[i];
|
||||||
error += this_diff * this_diff;
|
error += (unsigned)this_diff * this_diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
@ -417,7 +419,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
|
|||||||
|
|
||||||
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
|
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
int (*r)[2], int *rate,
|
int (*r)[2], int *rate,
|
||||||
int *d, int *distortion,
|
int64_t *d, int64_t *distortion,
|
||||||
int *s, int *skip,
|
int *s, int *skip,
|
||||||
int64_t txfm_cache[NB_TXFM_MODES],
|
int64_t txfm_cache[NB_TXFM_MODES],
|
||||||
TX_SIZE max_txfm_size) {
|
TX_SIZE max_txfm_size) {
|
||||||
@ -496,27 +498,15 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
rd[TX_4X4][1] : rd[TX_8X8][1];
|
rd[TX_4X4][1] : rd[TX_8X8][1];
|
||||||
}
|
}
|
||||||
|
|
||||||
static int block_error(int16_t *coeff, int16_t *dqcoeff,
|
static int64_t block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
|
||||||
int block_size, int shift) {
|
int shift) {
|
||||||
int i;
|
|
||||||
int64_t error = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < block_size; i++) {
|
|
||||||
int this_diff = coeff[i] - dqcoeff[i];
|
|
||||||
error += (unsigned)this_diff * this_diff;
|
|
||||||
}
|
|
||||||
error >>= shift;
|
|
||||||
|
|
||||||
return error > INT_MAX ? INT_MAX : (int)error;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
|
|
||||||
const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
|
const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
|
||||||
return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
|
return vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
|
||||||
16 << (bwl + bhl), shift);
|
16 << (bwl + bhl)) >> shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
|
static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
|
||||||
|
int shift) {
|
||||||
const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
|
const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
|
||||||
int64_t sum = 0;
|
int64_t sum = 0;
|
||||||
int plane;
|
int plane;
|
||||||
@ -524,11 +514,10 @@ static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
|
|||||||
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
|
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
|
||||||
const int subsampling = x->e_mbd.plane[plane].subsampling_x +
|
const int subsampling = x->e_mbd.plane[plane].subsampling_x +
|
||||||
x->e_mbd.plane[plane].subsampling_y;
|
x->e_mbd.plane[plane].subsampling_y;
|
||||||
sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
|
sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
|
||||||
16 << (bwl + bhl - subsampling), 0);
|
16 << (bwl + bhl - subsampling));
|
||||||
}
|
}
|
||||||
sum >>= shift;
|
return sum >> shift;
|
||||||
return sum > INT_MAX ? INT_MAX : (int)sum;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct rdcost_block_args {
|
struct rdcost_block_args {
|
||||||
@ -586,7 +575,8 @@ static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
|
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
|
||||||
int *rate, int *distortion, int *skippable,
|
int *rate, int64_t *distortion,
|
||||||
|
int *skippable,
|
||||||
BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
|
BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
|
||||||
MACROBLOCKD *const xd = &x->e_mbd;
|
MACROBLOCKD *const xd = &x->e_mbd;
|
||||||
xd->mode_info_context->mbmi.txfm_size = tx_size;
|
xd->mode_info_context->mbmi.txfm_size = tx_size;
|
||||||
@ -602,11 +592,12 @@ static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void super_block_yrd(VP9_COMP *cpi,
|
static void super_block_yrd(VP9_COMP *cpi,
|
||||||
MACROBLOCK *x, int *rate, int *distortion,
|
MACROBLOCK *x, int *rate, int64_t *distortion,
|
||||||
int *skip, BLOCK_SIZE_TYPE bs,
|
int *skip, BLOCK_SIZE_TYPE bs,
|
||||||
int64_t txfm_cache[NB_TXFM_MODES]) {
|
int64_t txfm_cache[NB_TXFM_MODES]) {
|
||||||
VP9_COMMON *const cm = &cpi->common;
|
VP9_COMMON *const cm = &cpi->common;
|
||||||
int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
|
int r[TX_SIZE_MAX_SB][2], s[TX_SIZE_MAX_SB];
|
||||||
|
int64_t d[TX_SIZE_MAX_SB];
|
||||||
MACROBLOCKD *xd = &x->e_mbd;
|
MACROBLOCKD *xd = &x->e_mbd;
|
||||||
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
|
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
|
||||||
|
|
||||||
@ -651,13 +642,13 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
|||||||
int *bmode_costs,
|
int *bmode_costs,
|
||||||
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
|
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
|
||||||
int *bestrate, int *bestratey,
|
int *bestrate, int *bestratey,
|
||||||
int *bestdistortion,
|
int64_t *bestdistortion,
|
||||||
BLOCK_SIZE_TYPE bsize) {
|
BLOCK_SIZE_TYPE bsize) {
|
||||||
MB_PREDICTION_MODE mode;
|
MB_PREDICTION_MODE mode;
|
||||||
MACROBLOCKD *xd = &x->e_mbd;
|
MACROBLOCKD *xd = &x->e_mbd;
|
||||||
int64_t best_rd = INT64_MAX;
|
int64_t best_rd = INT64_MAX;
|
||||||
int rate = 0;
|
int rate = 0;
|
||||||
int distortion;
|
int64_t distortion;
|
||||||
VP9_COMMON *const cm = &cpi->common;
|
VP9_COMMON *const cm = &cpi->common;
|
||||||
const int src_stride = x->plane[0].src.stride;
|
const int src_stride = x->plane[0].src.stride;
|
||||||
uint8_t *src, *dst;
|
uint8_t *src, *dst;
|
||||||
@ -777,7 +768,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
|
|||||||
|
|
||||||
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
|
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
|
||||||
int *Rate, int *rate_y,
|
int *Rate, int *rate_y,
|
||||||
int *Distortion, int64_t best_rd) {
|
int64_t *Distortion, int64_t best_rd) {
|
||||||
int i, j;
|
int i, j;
|
||||||
MACROBLOCKD *const xd = &mb->e_mbd;
|
MACROBLOCKD *const xd = &mb->e_mbd;
|
||||||
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
|
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
|
||||||
@ -785,7 +776,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
|
|||||||
int bh = 1 << b_height_log2(bsize);
|
int bh = 1 << b_height_log2(bsize);
|
||||||
int idx, idy;
|
int idx, idy;
|
||||||
int cost = 0;
|
int cost = 0;
|
||||||
int distortion = 0;
|
int64_t distortion = 0;
|
||||||
int tot_rate_y = 0;
|
int tot_rate_y = 0;
|
||||||
int64_t total_rd = 0;
|
int64_t total_rd = 0;
|
||||||
ENTROPY_CONTEXT t_above[4], t_left[4];
|
ENTROPY_CONTEXT t_above[4], t_left[4];
|
||||||
@ -802,7 +793,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
|
|||||||
const int mis = xd->mode_info_stride;
|
const int mis = xd->mode_info_stride;
|
||||||
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
|
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
|
||||||
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
|
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
|
||||||
int UNINITIALIZED_IS_SAFE(d);
|
int64_t UNINITIALIZED_IS_SAFE(d);
|
||||||
i = idy * 2 + idx;
|
i = idy * 2 + idx;
|
||||||
|
|
||||||
if (xd->frame_type == KEY_FRAME) {
|
if (xd->frame_type == KEY_FRAME) {
|
||||||
@ -844,14 +835,14 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
|
|||||||
|
|
||||||
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
int *rate, int *rate_tokenonly,
|
int *rate, int *rate_tokenonly,
|
||||||
int *distortion, int *skippable,
|
int64_t *distortion, int *skippable,
|
||||||
BLOCK_SIZE_TYPE bsize,
|
BLOCK_SIZE_TYPE bsize,
|
||||||
int64_t txfm_cache[NB_TXFM_MODES]) {
|
int64_t txfm_cache[NB_TXFM_MODES]) {
|
||||||
MB_PREDICTION_MODE mode;
|
MB_PREDICTION_MODE mode;
|
||||||
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
|
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
|
||||||
MACROBLOCKD *const xd = &x->e_mbd;
|
MACROBLOCKD *const xd = &x->e_mbd;
|
||||||
int this_rate, this_rate_tokenonly;
|
int this_rate, this_rate_tokenonly, s;
|
||||||
int this_distortion, s;
|
int64_t this_distortion;
|
||||||
int64_t best_rd = INT64_MAX, this_rd;
|
int64_t best_rd = INT64_MAX, this_rd;
|
||||||
TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
|
TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
|
||||||
int i;
|
int i;
|
||||||
@ -912,7 +903,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
|
static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
|
||||||
int *rate, int *distortion,
|
int *rate, int64_t *distortion,
|
||||||
int *skippable, BLOCK_SIZE_TYPE bsize,
|
int *skippable, BLOCK_SIZE_TYPE bsize,
|
||||||
TX_SIZE uv_tx_size) {
|
TX_SIZE uv_tx_size) {
|
||||||
MACROBLOCKD *const xd = &x->e_mbd;
|
MACROBLOCKD *const xd = &x->e_mbd;
|
||||||
@ -927,7 +918,7 @@ static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
|
static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
|
||||||
int *rate, int *distortion, int *skippable,
|
int *rate, int64_t *distortion, int *skippable,
|
||||||
BLOCK_SIZE_TYPE bsize) {
|
BLOCK_SIZE_TYPE bsize) {
|
||||||
MACROBLOCKD *const xd = &x->e_mbd;
|
MACROBLOCKD *const xd = &x->e_mbd;
|
||||||
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
|
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
|
||||||
@ -952,13 +943,13 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
|
|||||||
|
|
||||||
static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
int *rate, int *rate_tokenonly,
|
int *rate, int *rate_tokenonly,
|
||||||
int *distortion, int *skippable,
|
int64_t *distortion, int *skippable,
|
||||||
BLOCK_SIZE_TYPE bsize) {
|
BLOCK_SIZE_TYPE bsize) {
|
||||||
MB_PREDICTION_MODE mode;
|
MB_PREDICTION_MODE mode;
|
||||||
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
|
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
|
||||||
int64_t best_rd = INT64_MAX, this_rd;
|
int64_t best_rd = INT64_MAX, this_rd;
|
||||||
int this_rate_tokenonly, this_rate;
|
int this_rate_tokenonly, this_rate, s;
|
||||||
int this_distortion, s;
|
int64_t this_distortion;
|
||||||
|
|
||||||
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
|
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
|
||||||
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
|
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
|
||||||
@ -1101,7 +1092,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
|
|||||||
MACROBLOCK *x,
|
MACROBLOCK *x,
|
||||||
int i,
|
int i,
|
||||||
int *labelyrate,
|
int *labelyrate,
|
||||||
int *distortion,
|
int64_t *distortion,
|
||||||
ENTROPY_CONTEXT *ta,
|
ENTROPY_CONTEXT *ta,
|
||||||
ENTROPY_CONTEXT *tl) {
|
ENTROPY_CONTEXT *tl) {
|
||||||
int k;
|
int k;
|
||||||
@ -1126,7 +1117,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
|
|||||||
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
|
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
|
||||||
xd->plane[0].dst.buf,
|
xd->plane[0].dst.buf,
|
||||||
xd->plane[0].dst.stride);
|
xd->plane[0].dst.stride);
|
||||||
int thisdistortion = 0;
|
int64_t thisdistortion = 0;
|
||||||
int thisrate = 0;
|
int thisrate = 0;
|
||||||
|
|
||||||
*labelyrate = 0;
|
*labelyrate = 0;
|
||||||
@ -1189,7 +1180,7 @@ typedef struct {
|
|||||||
|
|
||||||
int64_t segment_rd;
|
int64_t segment_rd;
|
||||||
int r;
|
int r;
|
||||||
int d;
|
int64_t d;
|
||||||
int segment_yrate;
|
int segment_yrate;
|
||||||
MB_PREDICTION_MODE modes[4];
|
MB_PREDICTION_MODE modes[4];
|
||||||
int_mv mvs[4], second_mvs[4];
|
int_mv mvs[4], second_mvs[4];
|
||||||
@ -1281,21 +1272,18 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
BEST_SEG_INFO *bsi,
|
BEST_SEG_INFO *bsi,
|
||||||
int_mv seg_mvs[4][MAX_REF_FRAMES],
|
int_mv seg_mvs[4][MAX_REF_FRAMES],
|
||||||
int mi_row, int mi_col) {
|
int mi_row, int mi_col) {
|
||||||
int i, j;
|
int i, j, br = 0, rate = 0, sbr = 0, idx, idy;
|
||||||
int br = 0, bd = 0;
|
int64_t bd = 0, sbd = 0;
|
||||||
MB_PREDICTION_MODE this_mode;
|
MB_PREDICTION_MODE this_mode;
|
||||||
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
|
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
|
||||||
const int label_count = 4;
|
const int label_count = 4;
|
||||||
int64_t this_segment_rd = 0, other_segment_rd;
|
int64_t this_segment_rd = 0, other_segment_rd;
|
||||||
int label_mv_thresh;
|
int label_mv_thresh;
|
||||||
int rate = 0;
|
|
||||||
int sbr = 0, sbd = 0;
|
|
||||||
int segmentyrate = 0;
|
int segmentyrate = 0;
|
||||||
int best_eobs[4] = { 0 };
|
int best_eobs[4] = { 0 };
|
||||||
BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
|
BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
|
||||||
int bwl = b_width_log2(bsize), bw = 1 << bwl;
|
int bwl = b_width_log2(bsize), bw = 1 << bwl;
|
||||||
int bhl = b_height_log2(bsize), bh = 1 << bhl;
|
int bhl = b_height_log2(bsize), bh = 1 << bhl;
|
||||||
int idx, idy;
|
|
||||||
vp9_variance_fn_ptr_t *v_fn_ptr;
|
vp9_variance_fn_ptr_t *v_fn_ptr;
|
||||||
ENTROPY_CONTEXT t_above[4], t_left[4];
|
ENTROPY_CONTEXT t_above[4], t_left[4];
|
||||||
ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
|
ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
|
||||||
@ -1340,7 +1328,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
// search for the best motion vector on this segment
|
// search for the best motion vector on this segment
|
||||||
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
|
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
|
||||||
int64_t this_rd;
|
int64_t this_rd;
|
||||||
int distortion;
|
int64_t distortion;
|
||||||
int labelyrate;
|
int labelyrate;
|
||||||
ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
|
ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
|
||||||
const struct buf_2d orig_src = x->plane[0].src;
|
const struct buf_2d orig_src = x->plane[0].src;
|
||||||
@ -1527,7 +1515,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
int64_t best_rd,
|
int64_t best_rd,
|
||||||
int *returntotrate,
|
int *returntotrate,
|
||||||
int *returnyrate,
|
int *returnyrate,
|
||||||
int *returndistortion,
|
int64_t *returndistortion,
|
||||||
int *skippable, int mvthresh,
|
int *skippable, int mvthresh,
|
||||||
int_mv seg_mvs[4][MAX_REF_FRAMES],
|
int_mv seg_mvs[4][MAX_REF_FRAMES],
|
||||||
int mi_row, int mi_col) {
|
int mi_row, int mi_col) {
|
||||||
@ -1921,7 +1909,7 @@ static double model_dist_norm(double x) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void model_rd_from_var_lapndz(int var, int n, int qstep,
|
static void model_rd_from_var_lapndz(int var, int n, int qstep,
|
||||||
int *rate, int *dist) {
|
int *rate, int64_t *dist) {
|
||||||
// This function models the rate and distortion for a Laplacian
|
// This function models the rate and distortion for a Laplacian
|
||||||
// source with given variance when quantized with a uniform quantizer
|
// source with given variance when quantized with a uniform quantizer
|
||||||
// with given stepsize. The closed form expression is:
|
// with given stepsize. The closed form expression is:
|
||||||
@ -1958,12 +1946,13 @@ static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize,
|
|||||||
|
|
||||||
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
|
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
|
||||||
MACROBLOCK *x, MACROBLOCKD *xd,
|
MACROBLOCK *x, MACROBLOCKD *xd,
|
||||||
int *out_rate_sum, int *out_dist_sum) {
|
int *out_rate_sum, int64_t *out_dist_sum) {
|
||||||
// Note our transform coeffs are 8 times an orthogonal transform.
|
// Note our transform coeffs are 8 times an orthogonal transform.
|
||||||
// Hence quantizer step is also 8 times. To get effective quantizer
|
// Hence quantizer step is also 8 times. To get effective quantizer
|
||||||
// we need to divide by 8 before sending to modeling function.
|
// we need to divide by 8 before sending to modeling function.
|
||||||
unsigned int sse;
|
unsigned int sse;
|
||||||
int i, rate_sum = 0, dist_sum = 0;
|
int i, rate_sum = 0;
|
||||||
|
int64_t dist_sum = 0;
|
||||||
|
|
||||||
for (i = 0; i < MAX_MB_PLANE; ++i) {
|
for (i = 0; i < MAX_MB_PLANE; ++i) {
|
||||||
struct macroblock_plane *const p = &x->plane[i];
|
struct macroblock_plane *const p = &x->plane[i];
|
||||||
@ -1973,7 +1962,8 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
|
|||||||
const int bw = plane_block_width(bsize, pd);
|
const int bw = plane_block_width(bsize, pd);
|
||||||
const int bh = plane_block_height(bsize, pd);
|
const int bh = plane_block_height(bsize, pd);
|
||||||
const enum BlockSize bs = get_block_size(bw, bh);
|
const enum BlockSize bs = get_block_size(bw, bh);
|
||||||
int rate, dist;
|
int rate;
|
||||||
|
int64_t dist;
|
||||||
cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
|
cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
|
||||||
pd->dst.buf, pd->dst.stride, &sse);
|
pd->dst.buf, pd->dst.stride, &sse);
|
||||||
model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
|
model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
|
||||||
@ -2238,9 +2228,10 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
BLOCK_SIZE_TYPE bsize,
|
BLOCK_SIZE_TYPE bsize,
|
||||||
int64_t txfm_cache[],
|
int64_t txfm_cache[],
|
||||||
int *rate2, int *distortion, int *skippable,
|
int *rate2, int64_t *distortion,
|
||||||
int *rate_y, int *distortion_y,
|
int *skippable,
|
||||||
int *rate_uv, int *distortion_uv,
|
int *rate_y, int64_t *distortion_y,
|
||||||
|
int *rate_uv, int64_t *distortion_uv,
|
||||||
int *mode_excluded, int *disable_skip,
|
int *mode_excluded, int *disable_skip,
|
||||||
INTERPOLATIONFILTERTYPE *best_filter,
|
INTERPOLATIONFILTERTYPE *best_filter,
|
||||||
int_mv *frame_mv,
|
int_mv *frame_mv,
|
||||||
@ -2344,7 +2335,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
*best_filter = EIGHTTAP;
|
*best_filter = EIGHTTAP;
|
||||||
} else {
|
} else {
|
||||||
int i, newbest;
|
int i, newbest;
|
||||||
int tmp_rate_sum = 0, tmp_dist_sum = 0;
|
int tmp_rate_sum = 0;
|
||||||
|
int64_t tmp_dist_sum = 0;
|
||||||
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
|
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
|
||||||
int rs = 0;
|
int rs = 0;
|
||||||
const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
|
const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
|
||||||
@ -2359,7 +2351,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
if (interpolating_intpel_seen && is_intpel_interp) {
|
if (interpolating_intpel_seen && is_intpel_interp) {
|
||||||
rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum);
|
rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum);
|
||||||
} else {
|
} else {
|
||||||
int rate_sum = 0, dist_sum = 0;
|
int rate_sum = 0;
|
||||||
|
int64_t dist_sum = 0;
|
||||||
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
|
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
|
||||||
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
|
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
|
||||||
rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum);
|
rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum);
|
||||||
@ -2503,19 +2496,20 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
int *returnrate, int *returndist,
|
int *returnrate, int64_t *returndist,
|
||||||
BLOCK_SIZE_TYPE bsize,
|
BLOCK_SIZE_TYPE bsize,
|
||||||
PICK_MODE_CONTEXT *ctx) {
|
PICK_MODE_CONTEXT *ctx) {
|
||||||
VP9_COMMON *cm = &cpi->common;
|
VP9_COMMON *cm = &cpi->common;
|
||||||
MACROBLOCKD *xd = &x->e_mbd;
|
MACROBLOCKD *xd = &x->e_mbd;
|
||||||
int rate_y = 0, rate_uv;
|
int rate_y = 0, rate_uv = 0;
|
||||||
int rate_y_tokenonly = 0, rate_uv_tokenonly;
|
int rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
|
||||||
int dist_y = 0, dist_uv;
|
int64_t dist_y = 0, dist_uv = 0;
|
||||||
int y_skip = 0, uv_skip;
|
int y_skip = 0, uv_skip = 0;
|
||||||
int64_t txfm_cache[NB_TXFM_MODES], err;
|
int64_t txfm_cache[NB_TXFM_MODES], err;
|
||||||
MB_PREDICTION_MODE mode;
|
MB_PREDICTION_MODE mode;
|
||||||
TX_SIZE txfm_size;
|
TX_SIZE txfm_size;
|
||||||
int rate4x4_y, rate4x4_y_tokenonly, dist4x4_y;
|
int rate4x4_y, rate4x4_y_tokenonly;
|
||||||
|
int64_t dist4x4_y;
|
||||||
int64_t err4x4 = INT64_MAX;
|
int64_t err4x4 = INT64_MAX;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -2566,7 +2560,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
int mi_row, int mi_col,
|
int mi_row, int mi_col,
|
||||||
int *returnrate,
|
int *returnrate,
|
||||||
int *returndistortion,
|
int64_t *returndistortion,
|
||||||
BLOCK_SIZE_TYPE bsize,
|
BLOCK_SIZE_TYPE bsize,
|
||||||
PICK_MODE_CONTEXT *ctx) {
|
PICK_MODE_CONTEXT *ctx) {
|
||||||
VP9_COMMON *cm = &cpi->common;
|
VP9_COMMON *cm = &cpi->common;
|
||||||
@ -2601,7 +2595,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
|
INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
|
||||||
INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
|
INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
|
||||||
int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB];
|
int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB];
|
||||||
int dist_uv[TX_SIZE_MAX_SB], skip_uv[TX_SIZE_MAX_SB];
|
int64_t dist_uv[TX_SIZE_MAX_SB];
|
||||||
|
int skip_uv[TX_SIZE_MAX_SB];
|
||||||
MB_PREDICTION_MODE mode_uv[TX_SIZE_MAX_SB];
|
MB_PREDICTION_MODE mode_uv[TX_SIZE_MAX_SB];
|
||||||
struct scale_factors scale_factor[4];
|
struct scale_factors scale_factor[4];
|
||||||
unsigned int ref_frame_mask = 0;
|
unsigned int ref_frame_mask = 0;
|
||||||
@ -2704,7 +2699,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
int disable_skip = 0;
|
int disable_skip = 0;
|
||||||
int compmode_cost = 0;
|
int compmode_cost = 0;
|
||||||
int rate2 = 0, rate_y = 0, rate_uv = 0;
|
int rate2 = 0, rate_y = 0, rate_uv = 0;
|
||||||
int distortion2 = 0, distortion_y = 0, distortion_uv = 0;
|
int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
|
||||||
int skippable;
|
int skippable;
|
||||||
int64_t txfm_cache[NB_TXFM_MODES];
|
int64_t txfm_cache[NB_TXFM_MODES];
|
||||||
int i;
|
int i;
|
||||||
@ -2891,11 +2886,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
|||||||
distortion2 = distortion_y + distortion_uv;
|
distortion2 = distortion_y + distortion_uv;
|
||||||
} else if (this_mode == SPLITMV) {
|
} else if (this_mode == SPLITMV) {
|
||||||
const int is_comp_pred = mbmi->ref_frame[1] > 0;
|
const int is_comp_pred = mbmi->ref_frame[1] > 0;
|
||||||
int rate, distortion;
|
int rate;
|
||||||
|
int64_t distortion;
|
||||||
int64_t this_rd_thresh;
|
int64_t this_rd_thresh;
|
||||||
int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
|
int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
|
||||||
int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
|
int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
|
||||||
int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0;
|
int64_t tmp_best_distortion = INT_MAX;
|
||||||
|
int tmp_best_skippable = 0;
|
||||||
int switchable_filter_index;
|
int switchable_filter_index;
|
||||||
int_mv *second_ref = is_comp_pred ?
|
int_mv *second_ref = is_comp_pred ?
|
||||||
&mbmi->ref_mvs[mbmi->ref_frame[1]][0] : NULL;
|
&mbmi->ref_mvs[mbmi->ref_frame[1]][0] : NULL;
|
||||||
|
@ -20,12 +20,12 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex);
|
|||||||
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex);
|
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex);
|
||||||
|
|
||||||
void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
int *r, int *d, BLOCK_SIZE_TYPE bsize,
|
int *r, int64_t *d, BLOCK_SIZE_TYPE bsize,
|
||||||
PICK_MODE_CONTEXT *ctx);
|
PICK_MODE_CONTEXT *ctx);
|
||||||
|
|
||||||
int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
||||||
int mi_row, int mi_col,
|
int mi_row, int mi_col,
|
||||||
int *r, int *d, BLOCK_SIZE_TYPE bsize,
|
int *r, int64_t *d, BLOCK_SIZE_TYPE bsize,
|
||||||
PICK_MODE_CONTEXT *ctx);
|
PICK_MODE_CONTEXT *ctx);
|
||||||
|
|
||||||
void vp9_init_me_luts();
|
void vp9_init_me_luts();
|
||||||
|
@ -1,125 +0,0 @@
|
|||||||
;
|
|
||||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
|
||||||
;
|
|
||||||
; Use of this source code is governed by a BSD-style license
|
|
||||||
; that can be found in the LICENSE file in the root of the source
|
|
||||||
; tree. An additional intellectual property rights grant can be found
|
|
||||||
; in the file PATENTS. All contributing project authors may
|
|
||||||
; be found in the AUTHORS file in the root of the source tree.
|
|
||||||
;
|
|
||||||
|
|
||||||
|
|
||||||
%include "vpx_ports/x86_abi_support.asm"
|
|
||||||
|
|
||||||
;int vp9_block_error_xmm(short *coeff_ptr, short *dcoef_ptr)
|
|
||||||
global sym(vp9_block_error_xmm) PRIVATE
|
|
||||||
sym(vp9_block_error_xmm):
|
|
||||||
push rbp
|
|
||||||
mov rbp, rsp
|
|
||||||
SHADOW_ARGS_TO_STACK 2
|
|
||||||
push rsi
|
|
||||||
push rdi
|
|
||||||
; end prologue
|
|
||||||
|
|
||||||
mov rsi, arg(0) ;coeff_ptr
|
|
||||||
mov rdi, arg(1) ;dcoef_ptr
|
|
||||||
|
|
||||||
movdqa xmm0, [rsi]
|
|
||||||
movdqa xmm1, [rdi]
|
|
||||||
|
|
||||||
movdqa xmm2, [rsi+16]
|
|
||||||
movdqa xmm3, [rdi+16]
|
|
||||||
|
|
||||||
psubw xmm0, xmm1
|
|
||||||
psubw xmm2, xmm3
|
|
||||||
|
|
||||||
pmaddwd xmm0, xmm0
|
|
||||||
pmaddwd xmm2, xmm2
|
|
||||||
|
|
||||||
paddd xmm0, xmm2
|
|
||||||
|
|
||||||
pxor xmm5, xmm5
|
|
||||||
movdqa xmm1, xmm0
|
|
||||||
|
|
||||||
punpckldq xmm0, xmm5
|
|
||||||
punpckhdq xmm1, xmm5
|
|
||||||
|
|
||||||
paddd xmm0, xmm1
|
|
||||||
movdqa xmm1, xmm0
|
|
||||||
|
|
||||||
psrldq xmm0, 8
|
|
||||||
paddd xmm0, xmm1
|
|
||||||
|
|
||||||
movq rax, xmm0
|
|
||||||
|
|
||||||
pop rdi
|
|
||||||
pop rsi
|
|
||||||
; begin epilog
|
|
||||||
UNSHADOW_ARGS
|
|
||||||
pop rbp
|
|
||||||
ret
|
|
||||||
|
|
||||||
;int vp9_block_error_mmx(short *coeff_ptr, short *dcoef_ptr)
|
|
||||||
global sym(vp9_block_error_mmx) PRIVATE
|
|
||||||
sym(vp9_block_error_mmx):
|
|
||||||
push rbp
|
|
||||||
mov rbp, rsp
|
|
||||||
SHADOW_ARGS_TO_STACK 2
|
|
||||||
push rsi
|
|
||||||
push rdi
|
|
||||||
; end prolog
|
|
||||||
|
|
||||||
|
|
||||||
mov rsi, arg(0) ;coeff_ptr
|
|
||||||
pxor mm7, mm7
|
|
||||||
|
|
||||||
mov rdi, arg(1) ;dcoef_ptr
|
|
||||||
movq mm3, [rsi]
|
|
||||||
|
|
||||||
movq mm4, [rdi]
|
|
||||||
movq mm5, [rsi+8]
|
|
||||||
|
|
||||||
movq mm6, [rdi+8]
|
|
||||||
pxor mm1, mm1 ; from movd mm1, dc ; dc =0
|
|
||||||
|
|
||||||
movq mm2, mm7
|
|
||||||
psubw mm5, mm6
|
|
||||||
|
|
||||||
por mm1, mm2
|
|
||||||
pmaddwd mm5, mm5
|
|
||||||
|
|
||||||
pcmpeqw mm1, mm7
|
|
||||||
psubw mm3, mm4
|
|
||||||
|
|
||||||
pand mm1, mm3
|
|
||||||
pmaddwd mm1, mm1
|
|
||||||
|
|
||||||
paddd mm1, mm5
|
|
||||||
movq mm3, [rsi+16]
|
|
||||||
|
|
||||||
movq mm4, [rdi+16]
|
|
||||||
movq mm5, [rsi+24]
|
|
||||||
|
|
||||||
movq mm6, [rdi+24]
|
|
||||||
psubw mm5, mm6
|
|
||||||
|
|
||||||
pmaddwd mm5, mm5
|
|
||||||
psubw mm3, mm4
|
|
||||||
|
|
||||||
pmaddwd mm3, mm3
|
|
||||||
paddd mm3, mm5
|
|
||||||
|
|
||||||
paddd mm1, mm3
|
|
||||||
movq mm0, mm1
|
|
||||||
|
|
||||||
psrlq mm1, 32
|
|
||||||
paddd mm0, mm1
|
|
||||||
|
|
||||||
movq rax, mm0
|
|
||||||
|
|
||||||
pop rdi
|
|
||||||
pop rsi
|
|
||||||
; begin epilog
|
|
||||||
UNSHADOW_ARGS
|
|
||||||
pop rbp
|
|
||||||
ret
|
|
57
vp9/encoder/x86/vp9_error_sse2.asm
Normal file
57
vp9/encoder/x86/vp9_error_sse2.asm
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
;
|
||||||
|
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||||
|
;
|
||||||
|
; Use of this source code is governed by a BSD-style license
|
||||||
|
; that can be found in the LICENSE file in the root of the source
|
||||||
|
; tree. An additional intellectual property rights grant can be found
|
||||||
|
; in the file PATENTS. All contributing project authors may
|
||||||
|
; be found in the AUTHORS file in the root of the source tree.
|
||||||
|
;
|
||||||
|
|
||||||
|
%include "third_party/x86inc/x86inc.asm"
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
; void vp9_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size)
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
|
cglobal block_error, 3, 3, 6, uqc, dqc, size
|
||||||
|
pxor m4, m4 ; accumulator
|
||||||
|
pxor m5, m5 ; dedicated zero register
|
||||||
|
lea uqcq, [uqcq+sizeq*2]
|
||||||
|
lea dqcq, [dqcq+sizeq*2]
|
||||||
|
neg sizeq
|
||||||
|
.loop:
|
||||||
|
mova m0, [uqcq+sizeq*2]
|
||||||
|
mova m2, [dqcq+sizeq*2]
|
||||||
|
mova m1, [uqcq+sizeq*2+mmsize]
|
||||||
|
mova m3, [dqcq+sizeq*2+mmsize]
|
||||||
|
psubw m0, m2
|
||||||
|
psubw m1, m3
|
||||||
|
; individual errors are max. 15bit+sign, so squares are 30bit, and
|
||||||
|
; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
|
||||||
|
pmaddwd m0, m0
|
||||||
|
pmaddwd m1, m1
|
||||||
|
; accumulate in 64bit
|
||||||
|
punpckldq m2, m0, m5
|
||||||
|
punpckhdq m0, m5
|
||||||
|
punpckldq m3, m1, m5
|
||||||
|
punpckhdq m1, m5
|
||||||
|
paddq m4, m2
|
||||||
|
paddq m4, m0
|
||||||
|
paddq m4, m3
|
||||||
|
paddq m4, m1
|
||||||
|
add sizeq, mmsize
|
||||||
|
jl .loop
|
||||||
|
|
||||||
|
; accumulate horizontally and store in return value
|
||||||
|
movhlps m5, m4
|
||||||
|
paddq m4, m5
|
||||||
|
%if ARCH_X86_64
|
||||||
|
movq rax, m4
|
||||||
|
%else
|
||||||
|
pshufd m5, m4, 0x1
|
||||||
|
movd eax, m4
|
||||||
|
movd edx, m5
|
||||||
|
%endif
|
||||||
|
RET
|
@ -85,12 +85,12 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
|
|||||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_fwalsh_sse2.asm
|
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_fwalsh_sse2.asm
|
||||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
|
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
|
||||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
|
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
|
||||||
|
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
|
||||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
|
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
|
||||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
|
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
|
||||||
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
|
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
|
||||||
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
|
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
|
||||||
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
|
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
|
||||||
VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_encodeopt.asm
|
|
||||||
VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm
|
VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm
|
||||||
|
|
||||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
|
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
|
||||||
|
Loading…
x
Reference in New Issue
Block a user