Merge branch 'master' into nextgenv2

Change-Id: Id0b784b115602e2502b42fa972a5ae210435a3be
This commit is contained in:
Yaowu Xu
2015-12-11 08:57:43 -08:00
19 changed files with 414 additions and 165 deletions

View File

@@ -29,11 +29,14 @@ SCRIPT_DIR=$(dirname "$0")
LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
ORIG_PWD="$(pwd)"
TARGETS="arm64-darwin-gcc
armv7-darwin-gcc
armv7s-darwin-gcc
x86-iphonesimulator-gcc
x86_64-iphonesimulator-gcc"
ARM_TARGETS="arm64-darwin-gcc
armv7-darwin-gcc
armv7s-darwin-gcc"
SIM_TARGETS="x86-iphonesimulator-gcc
x86_64-iphonesimulator-gcc"
OSX_TARGETS="x86-darwin15-gcc
x86_64-darwin15-gcc"
TARGETS="${ARM_TARGETS} ${SIM_TARGETS}"
# Configures for the target specified by $1, and invokes make with the dist
# target using $DIST_DIR as the distribution output directory.
@@ -197,15 +200,27 @@ cleanup() {
fi
}
print_list() {
local indent="$1"
shift
local list="$@"
for entry in ${list}; do
echo "${indent}${entry}"
done
}
iosbuild_usage() {
cat << EOF
Usage: ${0##*/} [arguments]
--help: Display this message and exit.
--extra-configure-args <args>: Extra args to pass when configuring libvpx.
--macosx: Uses darwin15 targets instead of iphonesimulator targets for x86
and x86_64. Allows linking to framework when builds target MacOSX
instead of iOS.
--preserve-build-output: Do not delete the build directory.
--show-build-output: Show output from each library build.
--targets <targets>: Override default target list. Defaults:
${TARGETS}
$(print_list " " ${TARGETS})
--test-link: Confirms all targets can be linked. Functionally identical to
passing --enable-examples via --extra-configure-args.
--verbose: Output information about the environment and each stage of the
@@ -249,6 +264,9 @@ while [ -n "$1" ]; do
TARGETS="$2"
shift
;;
--macosx)
TARGETS="${ARM_TARGETS} ${OSX_TARGETS}"
;;
--verbose)
VERBOSE=yes
;;
@@ -273,10 +291,12 @@ cat << EOF
MAKEFLAGS=${MAKEFLAGS}
ORIG_PWD=${ORIG_PWD}
PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
TARGETS="${TARGETS}"
TARGETS="$(print_list "" ${TARGETS})"
OSX_TARGETS="${OSX_TARGETS}"
SIM_TARGETS="${SIM_TARGETS}"
EOF
fi
build_framework "${TARGETS}"
echo "Successfully built '${FRAMEWORK_DIR}' for:"
echo " ${TARGETS}"
print_list "" ${TARGETS}

View File

@@ -119,7 +119,7 @@
%if ABI_IS_32BIT
%if CONFIG_PIC=1
%ifidn __OUTPUT_FORMAT__,elf32
%define GET_GOT_SAVE_ARG 1
%define GET_GOT_DEFINED 1
%define WRT_PLT wrt ..plt
%macro GET_GOT 1
extern _GLOBAL_OFFSET_TABLE_
@@ -138,7 +138,7 @@
%define RESTORE_GOT pop %1
%endmacro
%elifidn __OUTPUT_FORMAT__,macho32
%define GET_GOT_SAVE_ARG 1
%define GET_GOT_DEFINED 1
%macro GET_GOT 1
push %1
call %%get_got
@@ -149,6 +149,8 @@
%undef RESTORE_GOT
%define RESTORE_GOT pop %1
%endmacro
%else
%define GET_GOT_DEFINED 0
%endif
%endif

View File

@@ -1230,10 +1230,12 @@ static void setup_quantization(VP10_COMMON *const cm, MACROBLOCKD *const xd,
cm->uv_dc_delta_q = read_delta_q(rb);
cm->uv_ac_delta_q = read_delta_q(rb);
cm->dequant_bit_depth = cm->bit_depth;
for (i = 0; i < (cm->seg.enabled ? MAX_SEGMENTS : 1); ++i) {
const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
xd->lossless[i] = cm->y_dc_delta_q == 0 &&
qindex == 0 &&
for (i = 0; i < MAX_SEGMENTS; ++i) {
const int qindex = CONFIG_MISC_FIXES && cm->seg.enabled ?
vp10_get_qindex(&cm->seg, i, cm->base_qindex) :
cm->base_qindex;
xd->lossless[i] = qindex == 0 &&
cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
}

View File

@@ -1922,7 +1922,7 @@ void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size) {
assert(n_log2_tiles > 0);
vpx_wb_write_literal(&saved_wb, mag, 2);
if (mag < 3)
data_sz = remux_tiles(data, data_sz, 1 << n_log2_tiles, mag);
data_sz = remux_tiles(data, (int)data_sz, 1 << n_log2_tiles, mag);
} else {
assert(n_log2_tiles == 0);
}

View File

@@ -84,6 +84,8 @@ struct macroblock {
int rddiv;
int rdmult;
int mb_energy;
int * m_search_count_ptr;
int * ex_search_count_ptr;
// These are set to their default values at the beginning, and then adjusted
// further in the encoding process.

View File

@@ -1181,7 +1181,7 @@ static void rd_pick_sb_modes(VP10_COMP *cpi,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
x->source_variance =
vp10_high_get_sby_perpixel_variance(cpi, &x->plane[0].src,
bsize, xd->bd);
bsize, xd->bd);
} else {
x->source_variance =
vp10_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
@@ -2776,7 +2776,7 @@ static MV_REFERENCE_FRAME get_frame_type(const VP10_COMP *cpi) {
}
static TX_MODE select_tx_mode(const VP10_COMP *cpi, MACROBLOCKD *const xd) {
if (!cpi->common.seg.enabled && xd->lossless[0])
if (xd->lossless[0])
return ONLY_4X4;
if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
return ALLOW_32X32;
@@ -2839,6 +2839,10 @@ void vp10_encode_tile(VP10_COMP *cpi, ThreadData *td,
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
int mi_row;
// Set up pointers to per thread motion search counters.
td->mb.m_search_count_ptr = &td->rd_counts.m_search_count;
td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
@@ -2892,11 +2896,15 @@ static void encode_frame_internal(VP10_COMP *cpi) {
vp10_zero(rdc->coef_counts);
vp10_zero(rdc->comp_pred_diff);
vp10_zero(rdc->filter_diff);
rdc->m_search_count = 0; // Count of motion search hits.
rdc->ex_search_count = 0; // Exhaustive mesh search hits.
for (i = 0; i < (cm->seg.enabled ? MAX_SEGMENTS : 1); ++i) {
const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
xd->lossless[i] = cm->y_dc_delta_q == 0 &&
qindex == 0 &&
for (i = 0; i < MAX_SEGMENTS; ++i) {
const int qindex = CONFIG_MISC_FIXES && cm->seg.enabled ?
vp10_get_qindex(&cm->seg, i, cm->base_qindex) :
cm->base_qindex;
xd->lossless[i] = qindex == 0 &&
cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
}

View File

@@ -2968,7 +2968,7 @@ static void output_frame_level_debug_stats(VP10_COMP *cpi) {
recon_err = vp10_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
if (cpi->twopass.total_left_stats.coded_error != 0.0)
fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d"
fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d"
"%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
"%10"PRId64" %10"PRId64" %10d "
"%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
@@ -2977,6 +2977,8 @@ static void output_frame_level_debug_stats(VP10_COMP *cpi) {
"%10lf %8u %10"PRId64" %10d %10d %10d\n",
cpi->common.current_video_frame,
cm->width, cm->height,
cpi->td.rd_counts.m_search_count,
cpi->td.rd_counts.ex_search_count,
cpi->rc.source_alt_ref_pending,
cpi->rc.source_alt_ref_active,
cpi->rc.this_frame_target,

View File

@@ -246,6 +246,8 @@ typedef struct RD_COUNTS {
vp10_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
int64_t comp_pred_diff[REFERENCE_MODES];
int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
int m_search_count;
int ex_search_count;
} RD_COUNTS;
typedef struct ThreadData {

View File

@@ -30,6 +30,11 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
for (n = 0; n < ENTROPY_TOKENS; n++)
td->rd_counts.coef_counts[i][j][k][l][m][n] +=
td_t->rd_counts.coef_counts[i][j][k][l][m][n];
// Counts of all motion searches and exhuastive mesh searches.
td->rd_counts.m_search_count += td_t->rd_counts.m_search_count;
td->rd_counts.ex_search_count += td_t->rd_counts.ex_search_count;
}
static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {

View File

@@ -1523,69 +1523,83 @@ int vp10_fast_dia_search(const MACROBLOCK *x,
#undef CHECK_BETTER
int vp10_full_range_search_c(const MACROBLOCK *x,
const search_site_config *cfg,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv) {
// Exhuastive motion search around a given centre position with a given
// step size.
static int exhuastive_mesh_search(const MACROBLOCK *x,
MV *ref_mv, MV *best_mv,
int range, int step, int sad_per_bit,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const int range = 64;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
MV fcenter_mv = {center_mv->row, center_mv->col};
unsigned int best_sad = INT_MAX;
int r, c, i;
int start_col, end_col, start_row, end_row;
int col_step = (step > 1) ? step : 4;
// The cfg and search_param parameters are not used in this search variant
(void)cfg;
(void)search_param;
assert(step >= 1);
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
*best_mv = *ref_mv;
*num00 = 11;
clamp_mv(&fcenter_mv, x->mv_col_min, x->mv_col_max,
x->mv_row_min, x->mv_row_max);
*best_mv = fcenter_mv;
best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
start_row = VPXMAX(-range, x->mv_row_min - ref_mv->row);
start_col = VPXMAX(-range, x->mv_col_min - ref_mv->col);
end_row = VPXMIN(range, x->mv_row_max - ref_mv->row);
end_col = VPXMIN(range, x->mv_col_max - ref_mv->col);
get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
start_row = VPXMAX(-range, x->mv_row_min - fcenter_mv.row);
start_col = VPXMAX(-range, x->mv_col_min - fcenter_mv.col);
end_row = VPXMIN(range, x->mv_row_max - fcenter_mv.row);
end_col = VPXMIN(range, x->mv_col_max - fcenter_mv.col);
for (r = start_row; r <= end_row; ++r) {
for (c = start_col; c <= end_col; c += 4) {
if (c + 3 <= end_col) {
unsigned int sads[4];
const uint8_t *addrs[4];
for (i = 0; i < 4; ++i) {
const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
addrs[i] = get_buf_from_mv(in_what, &mv);
}
fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
for (i = 0; i < 4; ++i) {
if (sads[i] < best_sad) {
const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
const unsigned int sad = sads[i] +
mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
}
for (r = start_row; r <= end_row; r += step) {
for (c = start_col; c <= end_col; c += col_step) {
// Step > 1 means we are not checking every location in this pass.
if (step > 1) {
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c};
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &mv), in_what->stride);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
}
}
} else {
for (i = 0; i < end_col - c; ++i) {
const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &mv), in_what->stride);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
// 4 sads in a single call if we are checking every location
if (c + 3 <= end_col) {
unsigned int sads[4];
const uint8_t *addrs[4];
for (i = 0; i < 4; ++i) {
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
addrs[i] = get_buf_from_mv(in_what, &mv);
}
fn_ptr->sdx4df(what->buf, what->stride, addrs,
in_what->stride, sads);
for (i = 0; i < 4; ++i) {
if (sads[i] < best_sad) {
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
const unsigned int sad = sads[i] +
mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
}
}
}
} else {
for (i = 0; i < end_col - c; ++i) {
const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &mv), in_what->stride);
if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
}
}
}
}
@@ -2014,6 +2028,70 @@ int vp10_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
return bestsme;
}
#define MIN_RANGE 7
#define MAX_RANGE 256
#define MIN_INTERVAL 1
// Runs an limited range exhaustive mesh search using a pattern set
// according to the encode speed profile.
static int full_pixel_exhaustive(VP10_COMP *cpi, MACROBLOCK *x,
MV *centre_mv_full, int sadpb, int *cost_list,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv) {
const SPEED_FEATURES *const sf = &cpi->sf;
MV temp_mv = {centre_mv_full->row, centre_mv_full->col};
MV f_ref_mv = {ref_mv->row >> 3, ref_mv->col >> 3};
int bestsme;
int i;
int interval = sf->mesh_patterns[0].interval;
int range = sf->mesh_patterns[0].range;
int baseline_interval_divisor;
// Keep track of number of exhaustive calls (this frame in this thread).
++(*x->ex_search_count_ptr);
// Trap illegal values for interval and range for this function.
if ((range < MIN_RANGE) || (range > MAX_RANGE) ||
(interval < MIN_INTERVAL) || (interval > range))
return INT_MAX;
baseline_interval_divisor = range / interval;
// Check size of proposed first range against magnitude of the centre
// value used as a starting point.
range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
range = VPXMIN(range, MAX_RANGE);
interval = VPXMAX(interval, range / baseline_interval_divisor);
// initial search
bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range,
interval, sadpb, fn_ptr, &temp_mv);
if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
// Progressive searches with range and step size decreasing each time
// till we reach a step size of 1. Then break out.
for (i = 1; i < MAX_MESH_STEP; ++i) {
// First pass with coarser step and longer range
bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv,
sf->mesh_patterns[i].range,
sf->mesh_patterns[i].interval,
sadpb, fn_ptr, &temp_mv);
if (sf->mesh_patterns[i].interval == 1)
break;
}
}
if (bestsme < INT_MAX)
bestsme = vp10_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
*dst_mv = temp_mv;
// Return cost list.
if (cost_list) {
calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
}
return bestsme;
}
int vp10_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
@@ -2327,6 +2405,18 @@ int vp10_refining_search_8p_c(const MACROBLOCK *x,
return best_sad;
}
#define MIN_EX_SEARCH_LIMIT 128
static int is_exhaustive_allowed(VP10_COMP *cpi, MACROBLOCK *x) {
const SPEED_FEATURES *const sf = &cpi->sf;
const int max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT,
(*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
return sf->allow_exhaustive_searches &&
(sf->exhaustive_searches_thresh < INT_MAX) &&
(*x->ex_search_count_ptr <= max_ex) &&
!cpi->rc.is_src_frame_alt_ref;
}
int vp10_full_pixel_search(VP10_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, MV *mvp_full,
int step_param, int error_per_bit,
@@ -2345,6 +2435,9 @@ int vp10_full_pixel_search(VP10_COMP *cpi, MACROBLOCK *x,
cost_list[4] = INT_MAX;
}
// Keep track of number of searches (this frame in this thread).
++(*x->m_search_count_ptr);
switch (method) {
case FAST_DIAMOND:
var = vp10_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
@@ -2370,6 +2463,29 @@ int vp10_full_pixel_search(VP10_COMP *cpi, MACROBLOCK *x,
var = vp10_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
MAX_MVSEARCH_STEPS - 1 - step_param,
1, cost_list, fn_ptr, ref_mv, tmp_mv);
// Should we allow a follow on exhaustive search?
if (is_exhaustive_allowed(cpi, x)) {
int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
exhuastive_thr >>= 8 - (b_width_log2_lookup[bsize] +
b_height_log2_lookup[bsize]);
// Threshold variance for an exhaustive full search.
if (var > exhuastive_thr) {
int var_ex;
MV tmp_mv_ex;
var_ex = full_pixel_exhaustive(cpi, x, tmp_mv,
error_per_bit, cost_list, fn_ptr,
ref_mv, &tmp_mv_ex);
if (var_ex < var) {
var = var_ex;
*tmp_mv = tmp_mv_ex;
}
}
}
break;
break;
default:
assert(0 && "Invalid search method.");

View File

@@ -3607,7 +3607,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
int step_param = 0;
int thissme, bestsme = INT_MAX;
int bestsme = INT_MAX;
int sadpb = x->sadperbit4;
MV mvp_full;
int max_mv;
@@ -3662,27 +3662,6 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
&bsi->ref_mv[0]->as_mv, new_mv,
INT_MAX, 1);
// Should we do a full search (best quality only)
if (cpi->oxcf.mode == BEST) {
int_mv *const best_mv = &mi->bmi[i].as_mv[0];
/* Check if mvp_full is within the range. */
clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
x->mv_row_min, x->mv_row_max);
thissme = cpi->full_search_sad(x, &mvp_full,
sadpb, 16, &cpi->fn_ptr[bsize],
&bsi->ref_mv[0]->as_mv,
&best_mv->as_mv);
cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
if (thissme < bestsme) {
bestsme = thissme;
*new_mv = best_mv->as_mv;
} else {
// The full search result is actually worse so re-instate the
// previous best vector
best_mv->as_mv = *new_mv;
}
}
if (bestsme < INT_MAX) {
int distortion;
cpi->find_fractional_mv_step(

View File

@@ -16,6 +16,23 @@
#include "vpx_dsp/vpx_dsp_common.h"
// Mesh search patters for various speed settings
static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] =
{{64, 4}, {28, 2}, {15, 1}, {7, 1}};
#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
static MESH_PATTERN good_quality_mesh_patterns[MAX_MESH_SPEED + 1]
[MAX_MESH_STEP] =
{{{64, 8}, {28, 4}, {15, 1}, {7, 1}},
{{64, 8}, {28, 4}, {15, 1}, {7, 1}},
{{64, 8}, {14, 2}, {7, 1}, {7, 1}},
{{64, 16}, {24, 8}, {12, 4}, {7, 1}},
{{64, 16}, {24, 8}, {12, 4}, {7, 1}},
{{64, 16}, {24, 8}, {12, 4}, {7, 1}},
};
static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] =
{50, 25, 15, 5, 1, 1};
// Intra only frames, golden frames (except alt ref overlays) and
// alt ref frames tend to be coded at a higher than ambient quality
static int frame_is_boosted(const VP10_COMP *cpi) {
@@ -251,6 +268,8 @@ static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf,
sf->static_segmentation = 0;
sf->adaptive_rd_thresh = 1;
sf->use_fast_coef_costing = 1;
sf->allow_exhaustive_searches = 0;
sf->exhaustive_searches_thresh = INT_MAX;
if (speed >= 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
@@ -498,8 +517,36 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
set_good_speed_feature(cpi, cm, sf, oxcf->speed);
cpi->full_search_sad = vp10_full_search_sad;
cpi->diamond_search_sad = oxcf->mode == BEST ? vp10_full_range_search
: vp10_diamond_search_sad;
cpi->diamond_search_sad = vp10_diamond_search_sad;
sf->allow_exhaustive_searches = 1;
if (oxcf->mode == BEST) {
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
sf->exhaustive_searches_thresh = (1 << 20);
else
sf->exhaustive_searches_thresh = (1 << 21);
sf->max_exaustive_pct = 100;
for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
}
} else {
int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
sf->exhaustive_searches_thresh = (1 << 22);
else
sf->exhaustive_searches_thresh = (1 << 23);
sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
if (speed > 0)
sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mesh_patterns[i].range =
good_quality_mesh_patterns[speed][i].range;
sf->mesh_patterns[i].interval =
good_quality_mesh_patterns[speed][i].interval;
}
}
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.

View File

@@ -195,6 +195,13 @@ typedef struct MV_SPEED_FEATURES {
int fullpel_search_step_param;
} MV_SPEED_FEATURES;
#define MAX_MESH_STEP 4
typedef struct MESH_PATTERN {
int range;
int interval;
} MESH_PATTERN;
typedef struct SPEED_FEATURES {
MV_SPEED_FEATURES mv;
@@ -290,6 +297,18 @@ typedef struct SPEED_FEATURES {
// point for this motion search and limits the search range around it.
int adaptive_motion_search;
// Flag for allowing some use of exhaustive searches;
int allow_exhaustive_searches;
// Threshold for allowing exhaistive motion search.
int exhaustive_searches_thresh;
// Maximum number of exhaustive searches for a frame.
int max_exaustive_pct;
// Pattern to be used for any exhaustive mesh searches.
MESH_PATTERN mesh_patterns[MAX_MESH_STEP];
int schedule_mode_search;
// Allows sub 8x8 modes to use the prediction filter that was determined

View File

@@ -1075,7 +1075,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
if (!cpi->refresh_alt_ref_frame) {
active_best_quality = cq_level;
} else {
active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
// Modify best quality for second level arfs. For mode VPX_Q this
// becomes the baseline frame q.

View File

@@ -135,15 +135,38 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1,
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
int src_byte = frame1[byte];
int pixel_value = *frame2++;
int pixel_value = *frame2;
// non-local mean approach
int diff_sse[9] = { 0 };
int idx, idy, index = 0;
for (idy = -1; idy <= 1; ++idy) {
for (idx = -1; idx <= 1; ++idx) {
int row = i + idy;
int col = j + idx;
if (row >= 0 && row < (int)block_height &&
col >= 0 && col < (int)block_width) {
int diff = frame1[byte + idy * (int)stride + idx] -
frame2[idy * (int)block_width + idx];
diff_sse[index] = diff * diff;
++index;
}
}
}
assert(index > 0);
modifier = 0;
for (idx = 0; idx < 9; ++idx)
modifier += diff_sse[idx];
modifier *= 3;
modifier /= index;
++frame2;
modifier = src_byte - pixel_value;
// This is an integer approximation of:
// float coeff = (3.0 * modifer * modifier) / pow(2, strength);
// modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
modifier *= modifier;
modifier *= 3;
modifier += rounding;
modifier >>= strength;
@@ -182,15 +205,34 @@ void vp9_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
int src_byte = frame1[byte];
int pixel_value = *frame2++;
int pixel_value = *frame2;
int diff_sse[9] = { 0 };
int idx, idy, index = 0;
for (idy = -1; idy <= 1; ++idy) {
for (idx = -1; idx <= 1; ++idx) {
int row = i + idy;
int col = j + idx;
if (row >= 0 && row < (int)block_height &&
col >= 0 && col < (int)block_width) {
int diff = frame1[byte + idy * (int)stride + idx] -
frame2[idy * (int)block_width + idx];
diff_sse[index] = diff * diff;
++index;
}
}
}
assert(index > 0);
modifier = 0;
for (idx = 0; idx < 9; ++idx)
modifier += diff_sse[idx];
modifier = src_byte - pixel_value;
// This is an integer approximation of:
// float coeff = (3.0 * modifer * modifier) / pow(2, strength);
// modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
modifier *= modifier;
modifier *= 3;
modifier /= index;
++frame2;
modifier += rounding;
modifier >>= strength;
@@ -383,55 +425,58 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int adj_strength = strength + 2 * (mbd->bd - 8);
// Apply the filter (YUV)
vp9_highbd_temporal_filter_apply(f->y_buffer + mb_y_offset,
f->y_stride,
predictor, 16, 16, adj_strength,
filter_weight,
accumulator, count);
vp9_highbd_temporal_filter_apply(f->u_buffer + mb_uv_offset,
f->uv_stride, predictor + 256,
mb_uv_width, mb_uv_height,
adj_strength,
filter_weight, accumulator + 256,
count + 256);
vp9_highbd_temporal_filter_apply(f->v_buffer + mb_uv_offset,
f->uv_stride, predictor + 512,
mb_uv_width, mb_uv_height,
adj_strength, filter_weight,
accumulator + 512, count + 512);
vp9_highbd_temporal_filter_apply_c(f->y_buffer + mb_y_offset,
f->y_stride,
predictor, 16, 16, adj_strength,
filter_weight,
accumulator, count);
vp9_highbd_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
f->uv_stride, predictor + 256,
mb_uv_width, mb_uv_height,
adj_strength,
filter_weight, accumulator + 256,
count + 256);
vp9_highbd_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
f->uv_stride, predictor + 512,
mb_uv_width, mb_uv_height,
adj_strength, filter_weight,
accumulator + 512, count + 512);
} else {
// Apply the filter (YUV)
vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
count + 512);
}
#else
// Apply the filter (YUV)
// TODO(jingning): Need SIMD optimization for this.
vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
count + 512);
}
#else
// Apply the filter (YUV)
vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
count + 512);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}

View File

@@ -123,8 +123,10 @@ SECTION .text
%define sec_str sec_stridemp
; Store bilin_filter and pw_8 location in stack
GET_GOT eax
add esp, 4 ; restore esp
%if GET_GOT_DEFINED == 1
GET_GOT eax
add esp, 4 ; restore esp
%endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx
@@ -140,8 +142,10 @@ SECTION .text
%define block_height heightd
; Store bilin_filter and pw_8 location in stack
GET_GOT eax
add esp, 4 ; restore esp
%if GET_GOT_DEFINED == 1
GET_GOT eax
add esp, 4 ; restore esp
%endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx

View File

@@ -47,9 +47,9 @@ cglobal dc_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset
INIT_XMM sse2
cglobal dc_left_predictor_4x4, 2, 5, 2, dst, stride, above, left, goffset
movifnidn leftq, leftmp
GET_GOT goffsetq
movifnidn leftq, leftmp
pxor m1, m1
movd m0, [leftq]
psadbw m0, m1
@@ -143,9 +143,9 @@ cglobal dc_top_predictor_8x8, 3, 5, 2, dst, stride, above, left, goffset
INIT_XMM sse2
cglobal dc_left_predictor_8x8, 2, 5, 2, dst, stride, above, left, goffset
movifnidn leftq, leftmp
GET_GOT goffsetq
movifnidn leftq, leftmp
pxor m1, m1
movq m0, [leftq]
DEFINE_ARGS dst, stride, stride3
@@ -239,14 +239,11 @@ cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
pxor m2, m2
mova m0, [aboveq]
DEFINE_ARGS dst, stride, stride3, lines4
lea stride3q, [strideq*3]
mov lines4d, 4
psadbw m0, m1
psadbw m2, m1
paddw m0, m2
movhlps m2, m0
paddw m0, m2
paddw m0, [GLOBAL(pw2_16)]
@@ -271,14 +268,11 @@ cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
pxor m2, m2
mova m0, [leftq]
DEFINE_ARGS dst, stride, stride3, lines4
lea stride3q, [strideq*3]
mov lines4d, 4
psadbw m0, m1
psadbw m2, m1
paddw m0, m2
movhlps m2, m0
paddw m0, m2
paddw m0, [GLOBAL(pw2_16)]

View File

@@ -139,8 +139,10 @@ SECTION .text
%define sec_str sec_stridemp
;Store bilin_filter and pw_8 location in stack
GET_GOT eax
add esp, 4 ; restore esp
%if GET_GOT_DEFINED == 1
GET_GOT eax
add esp, 4 ; restore esp
%endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx
@@ -156,8 +158,10 @@ SECTION .text
%define block_height heightd
;Store bilin_filter and pw_8 location in stack
GET_GOT eax
add esp, 4 ; restore esp
%if GET_GOT_DEFINED == 1
GET_GOT eax
add esp, 4 ; restore esp
%endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx

View File

@@ -189,7 +189,6 @@
%if ABI_IS_32BIT
%if CONFIG_PIC=1
%ifidn __OUTPUT_FORMAT__,elf32
%define GET_GOT_SAVE_ARG 1
%define WRT_PLT wrt ..plt
%macro GET_GOT 1
extern _GLOBAL_OFFSET_TABLE_
@@ -208,7 +207,6 @@
%define RESTORE_GOT pop %1
%endmacro
%elifidn __OUTPUT_FORMAT__,macho32
%define GET_GOT_SAVE_ARG 1
%macro GET_GOT 1
push %1
call %%get_got