Merge branch 'master' into nextgenv2

This commit is contained in:
Yaowu Xu
2016-01-30 05:00:05 -08:00
25 changed files with 382 additions and 164 deletions

View File

@@ -647,7 +647,7 @@ process_common_toolchain() {
armv6*) armv6*)
tgt_isa=armv6 tgt_isa=armv6
;; ;;
armv7*-hardfloat*) armv7*-hardfloat* | armv7*-gnueabihf | arm-*-gnueabihf)
tgt_isa=armv7 tgt_isa=armv7
float_abi=hard float_abi=hard
;; ;;
@@ -877,7 +877,6 @@ process_common_toolchain() {
case ${tgt_cc} in case ${tgt_cc} in
gcc) gcc)
CROSS=${CROSS:-arm-none-linux-gnueabi-}
link_with_cc=gcc link_with_cc=gcc
setup_gnu_toolchain setup_gnu_toolchain
arch_int=${tgt_isa##armv} arch_int=${tgt_isa##armv}
@@ -1135,7 +1134,7 @@ EOF
CC=${CC:-${CROSS}gcc} CC=${CC:-${CROSS}gcc}
CXX=${CXX:-${CROSS}g++} CXX=${CXX:-${CROSS}g++}
LD=${LD:-${CROSS}gcc} LD=${LD:-${CROSS}gcc}
CROSS=${CROSS:-g} CROSS=${CROSS-g}
;; ;;
os2) os2)
disable_feature pic disable_feature pic

68
test/encode_api_test.cc Normal file
View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_config.h"
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
namespace {
#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
TEST(EncodeAPI, InvalidParams) {
static const vpx_codec_iface_t *kCodecs[] = {
#if CONFIG_VP8_ENCODER
&vpx_codec_vp8_cx_algo,
#endif
#if CONFIG_VP9_ENCODER
&vpx_codec_vp9_cx_algo,
#endif
#if CONFIG_VP10_ENCODER
&vpx_codec_vp10_cx_algo,
#endif
};
uint8_t buf[1] = {0};
vpx_image_t img;
vpx_codec_ctx_t enc;
vpx_codec_enc_cfg_t cfg;
EXPECT_EQ(&img, vpx_img_wrap(&img, VPX_IMG_FMT_I420, 1, 1, 1, buf));
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(NULL, NULL, NULL, 0));
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(&enc, NULL, NULL, 0));
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, NULL, 0, 0, 0, 0));
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, &img, 0, 0, 0, 0));
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_destroy(NULL));
EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
vpx_codec_enc_config_default(NULL, NULL, 0));
EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
vpx_codec_enc_config_default(NULL, &cfg, 0));
EXPECT_TRUE(vpx_codec_error(NULL) != NULL);
for (int i = 0; i < NELEMENTS(kCodecs); ++i) {
SCOPED_TRACE(vpx_codec_iface_name(kCodecs[i]));
EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
vpx_codec_enc_init(NULL, kCodecs[i], NULL, 0));
EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
vpx_codec_enc_init(&enc, kCodecs[i], NULL, 0));
EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
vpx_codec_enc_config_default(kCodecs[i], &cfg, 1));
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(kCodecs[i], &cfg, 0));
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, kCodecs[i], &cfg, 0));
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_encode(&enc, NULL, 0, 0, 0, 0));
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc));
}
}
} // namespace

View File

@@ -562,6 +562,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-352x288.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-352x288.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm

View File

@@ -550,6 +550,8 @@ d17bc08eedfc60c4c23d576a6c964a21bf854d1f *vp90-2-03-size-226x202.webm
83c6d8f2969b759e10e5c6542baca1265c874c29 *vp90-2-03-size-226x224.webm.md5 83c6d8f2969b759e10e5c6542baca1265c874c29 *vp90-2-03-size-226x224.webm.md5
fe0af2ee47b1e5f6a66db369e2d7e9d870b38dce *vp90-2-03-size-226x226.webm fe0af2ee47b1e5f6a66db369e2d7e9d870b38dce *vp90-2-03-size-226x226.webm
94ad19b8b699cea105e2ff18f0df2afd7242bcf7 *vp90-2-03-size-226x226.webm.md5 94ad19b8b699cea105e2ff18f0df2afd7242bcf7 *vp90-2-03-size-226x226.webm.md5
52bc1dfd3a97b24d922eb8a31d07527891561f2a *vp90-2-03-size-352x288.webm
3084d6d0a1eec22e85a394422fbc8faae58930a5 *vp90-2-03-size-352x288.webm.md5
b6524e4084d15b5d0caaa3d3d1368db30cbee69c *vp90-2-03-deltaq.webm b6524e4084d15b5d0caaa3d3d1368db30cbee69c *vp90-2-03-deltaq.webm
65f45ec9a55537aac76104818278e0978f94a678 *vp90-2-03-deltaq.webm.md5 65f45ec9a55537aac76104818278e0978f94a678 *vp90-2-03-deltaq.webm.md5
4dbb87494c7f565ffc266c98d17d0d8c7a5c5aba *vp90-2-05-resize.ivf 4dbb87494c7f565ffc266c98d17d0d8c7a5c5aba *vp90-2-05-resize.ivf

View File

@@ -20,6 +20,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ivf_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += aq_segment_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += aq_segment_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += datarate_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += datarate_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += encode_api_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += resize_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += resize_test.cc

View File

@@ -180,7 +180,8 @@ const char *const kVP9TestVectors[] = {
"vp90-2-03-size-226x198.webm", "vp90-2-03-size-226x200.webm", "vp90-2-03-size-226x198.webm", "vp90-2-03-size-226x200.webm",
"vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm", "vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm",
"vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm", "vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm",
"vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm", "vp90-2-03-size-226x226.webm", "vp90-2-03-size-352x288.webm",
"vp90-2-03-deltaq.webm",
"vp90-2-05-resize.ivf", "vp90-2-06-bilinear.webm", "vp90-2-05-resize.ivf", "vp90-2-06-bilinear.webm",
"vp90-2-07-frame_parallel.webm", "vp90-2-08-tile_1x2_frame_parallel.webm", "vp90-2-07-frame_parallel.webm", "vp90-2-08-tile_1x2_frame_parallel.webm",
"vp90-2-08-tile_1x2.webm", "vp90-2-08-tile_1x4_frame_parallel.webm", "vp90-2-08-tile_1x2.webm", "vp90-2-08-tile_1x4_frame_parallel.webm",

View File

@@ -1629,7 +1629,6 @@ static void allocate_gf_group_bits(VP10_COMP *cpi, int64_t gf_group_bits,
int mid_boost_bits = 0; int mid_boost_bits = 0;
int mid_frame_idx; int mid_frame_idx;
unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS]; unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];
int alt_frame_index = frame_index;
key_frame = cpi->common.frame_type == KEY_FRAME; key_frame = cpi->common.frame_type == KEY_FRAME;
@@ -1642,15 +1641,13 @@ static void allocate_gf_group_bits(VP10_COMP *cpi, int64_t gf_group_bits,
gf_group->update_type[0] = OVERLAY_UPDATE; gf_group->update_type[0] = OVERLAY_UPDATE;
gf_group->rf_level[0] = INTER_NORMAL; gf_group->rf_level[0] = INTER_NORMAL;
gf_group->bit_allocation[0] = 0; gf_group->bit_allocation[0] = 0;
gf_group->arf_update_idx[0] = arf_buffer_indices[0];
gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
} else { } else {
gf_group->update_type[0] = GF_UPDATE; gf_group->update_type[0] = GF_UPDATE;
gf_group->rf_level[0] = GF_ARF_STD; gf_group->rf_level[0] = GF_ARF_STD;
gf_group->bit_allocation[0] = gf_arf_bits; gf_group->bit_allocation[0] = gf_arf_bits;
}
gf_group->arf_update_idx[0] = arf_buffer_indices[0]; gf_group->arf_update_idx[0] = arf_buffer_indices[0];
gf_group->arf_ref_idx[0] = arf_buffer_indices[0]; gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
}
// Step over the golden frame / overlay frame // Step over the golden frame / overlay frame
if (EOF == input_stats(twopass, &frame_stats)) if (EOF == input_stats(twopass, &frame_stats))
@@ -1664,15 +1661,15 @@ static void allocate_gf_group_bits(VP10_COMP *cpi, int64_t gf_group_bits,
// Store the bits to spend on the ARF if there is one. // Store the bits to spend on the ARF if there is one.
if (rc->source_alt_ref_pending) { if (rc->source_alt_ref_pending) {
gf_group->update_type[alt_frame_index] = ARF_UPDATE; gf_group->update_type[frame_index] = ARF_UPDATE;
gf_group->rf_level[alt_frame_index] = GF_ARF_STD; gf_group->rf_level[frame_index] = GF_ARF_STD;
gf_group->bit_allocation[alt_frame_index] = gf_arf_bits; gf_group->bit_allocation[frame_index] = gf_arf_bits;
gf_group->arf_src_offset[alt_frame_index] = gf_group->arf_src_offset[frame_index] =
(unsigned char)(rc->baseline_gf_interval - 1); (unsigned char)(rc->baseline_gf_interval - 1);
gf_group->arf_update_idx[alt_frame_index] = arf_buffer_indices[0]; gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
gf_group->arf_ref_idx[alt_frame_index] = gf_group->arf_ref_idx[frame_index] =
arf_buffer_indices[cpi->multi_arf_last_grp_enabled && arf_buffer_indices[cpi->multi_arf_last_grp_enabled &&
rc->source_alt_ref_active]; rc->source_alt_ref_active];
++frame_index; ++frame_index;

View File

@@ -207,7 +207,7 @@ struct lookahead_entry *vp10_lookahead_pop(struct lookahead_ctx *ctx,
int drain) { int drain) {
struct lookahead_entry *buf = NULL; struct lookahead_entry *buf = NULL;
if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { if (ctx && ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
buf = pop(ctx, &ctx->read_idx); buf = pop(ctx, &ctx->read_idx);
ctx->sz--; ctx->sz--;
} }

View File

@@ -142,8 +142,10 @@ int vp10_prob_diff_update_savings_search_model(const unsigned int *ct,
vpx_prob *bestp, vpx_prob *bestp,
vpx_prob upd, vpx_prob upd,
int stepsize) { int stepsize) {
int i, old_b, new_b, update_b, savings, bestsavings, step; int i, old_b, new_b, update_b, savings, bestsavings;
int newp; int newp;
const int step_sign = *bestp > oldp[PIVOT_NODE] ? -1 : 1;
const int step = stepsize * step_sign;
vpx_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES]; vpx_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES];
vp10_model_to_full_probs(oldp, oldplist); vp10_model_to_full_probs(oldp, oldplist);
memcpy(newplist, oldp, sizeof(vpx_prob) * UNCONSTRAINED_NODES); memcpy(newplist, oldp, sizeof(vpx_prob) * UNCONSTRAINED_NODES);
@@ -154,9 +156,10 @@ int vp10_prob_diff_update_savings_search_model(const unsigned int *ct,
bestsavings = 0; bestsavings = 0;
bestnewp = oldp[PIVOT_NODE]; bestnewp = oldp[PIVOT_NODE];
if (*bestp > oldp[PIVOT_NODE]) { assert(stepsize > 0);
step = -stepsize;
for (newp = *bestp; newp > oldp[PIVOT_NODE]; newp += step) { for (newp = *bestp; (newp - oldp[PIVOT_NODE]) * step_sign < 0;
newp += step) {
if (newp < 1 || newp > 255) if (newp < 1 || newp > 255)
continue; continue;
newplist[PIVOT_NODE] = newp; newplist[PIVOT_NODE] = newp;
@@ -172,25 +175,6 @@ int vp10_prob_diff_update_savings_search_model(const unsigned int *ct,
bestnewp = newp; bestnewp = newp;
} }
} }
} else {
step = stepsize;
for (newp = *bestp; newp < oldp[PIVOT_NODE]; newp += step) {
if (newp < 1 || newp > 255)
continue;
newplist[PIVOT_NODE] = newp;
vp10_model_to_full_probs(newplist, newplist);
for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i)
new_b += cost_branch256(ct + 2 * i, newplist[i]);
new_b += cost_branch256(ct + 2 * PIVOT_NODE, newplist[PIVOT_NODE]);
update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) +
vp10_cost_upd256;
savings = old_b - new_b - update_b;
if (savings > bestsavings) {
bestsavings = savings;
bestnewp = newp;
}
}
}
*bestp = bestnewp; *bestp = bestnewp;
return bestsavings; return bestsavings;

View File

@@ -17,7 +17,7 @@
extern "C" { extern "C" {
#endif #endif
static void intra_prediction_down_copy(MACROBLOCKD *xd, static INLINE void intra_prediction_down_copy(MACROBLOCKD *xd,
unsigned char *above_right_src) unsigned char *above_right_src)
{ {
int dst_stride = xd->dst.y_stride; int dst_stride = xd->dst.y_stride;

View File

@@ -181,6 +181,7 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx,
{ {
struct lookahead_entry* buf = NULL; struct lookahead_entry* buf = NULL;
assert(ctx != NULL);
if(ctx->sz && (drain || ctx->sz == ctx->max_sz - 1)) if(ctx->sz && (drain || ctx->sz == ctx->max_sz - 1))
{ {
buf = pop(ctx, &ctx->read_idx); buf = pop(ctx, &ctx->read_idx);

View File

@@ -36,6 +36,8 @@
extern unsigned int cnt_pm; extern unsigned int cnt_pm;
#endif #endif
#define MODEL_MODE 0
extern const int vp8_ref_frame_order[MAX_MODES]; extern const int vp8_ref_frame_order[MAX_MODES];
extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES]; extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
@@ -45,18 +47,21 @@ extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
// skin color classifier is defined. // skin color classifier is defined.
// Fixed-point skin color model parameters. // Fixed-point skin color model parameters.
static const int skin_mean[2] = {7463, 9614}; // q6 static const int skin_mean[5][2] =
{{7463, 9614}, {6400, 10240}, {7040, 10240}, {8320, 9280}, {6800, 9614}};
static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16 static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16
static const int skin_threshold = 1570636; // q18 static const int skin_threshold[2] = {1570636, 800000}; // q18
// Evaluates the Mahalanobis distance measure for the input CbCr values. // Evaluates the Mahalanobis distance measure for the input CbCr values.
static int evaluate_skin_color_difference(int cb, int cr) static int evaluate_skin_color_difference(int cb, int cr, int idx) {
{
const int cb_q6 = cb << 6; const int cb_q6 = cb << 6;
const int cr_q6 = cr << 6; const int cr_q6 = cr << 6;
const int cb_diff_q12 = (cb_q6 - skin_mean[0]) * (cb_q6 - skin_mean[0]); const int cb_diff_q12 =
const int cbcr_diff_q12 = (cb_q6 - skin_mean[0]) * (cr_q6 - skin_mean[1]); (cb_q6 - skin_mean[idx][0]) * (cb_q6 - skin_mean[idx][0]);
const int cr_diff_q12 = (cr_q6 - skin_mean[1]) * (cr_q6 - skin_mean[1]); const int cbcr_diff_q12 =
(cb_q6 - skin_mean[idx][0]) * (cr_q6 - skin_mean[idx][1]);
const int cr_diff_q12 =
(cr_q6 - skin_mean[idx][1]) * (cr_q6 - skin_mean[idx][1]);
const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10; const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10;
const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10; const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10;
const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10; const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10;
@@ -67,6 +72,34 @@ static int evaluate_skin_color_difference(int cb, int cr)
return skin_diff; return skin_diff;
} }
// Checks if the input yCbCr values corresponds to skin color.
static int is_skin_color(int y, int cb, int cr)
{
if (y < 40 || y > 220)
{
return 0;
}
else
{
if (MODEL_MODE == 0)
{
return (evaluate_skin_color_difference(cb, cr, 0) < skin_threshold[0]);
}
else
{
int i = 0;
for (; i < 5; i++)
{
if (evaluate_skin_color_difference(cb, cr, i) < skin_threshold[1])
{
return 1;
}
}
return 0;
}
}
}
static int macroblock_corner_grad(unsigned char* signal, int stride, static int macroblock_corner_grad(unsigned char* signal, int stride,
int offsetx, int offsety, int sgnx, int sgny) int offsetx, int offsety, int sgnx, int sgny)
{ {
@@ -157,16 +190,6 @@ static int check_dot_artifact_candidate(VP8_COMP *cpi,
return 0; return 0;
} }
// Checks if the input yCbCr values corresponds to skin color.
static int is_skin_color(int y, int cb, int cr)
{
if (y < 40 || y > 220)
{
return 0;
}
return (evaluate_skin_color_difference(cb, cr) < skin_threshold);
}
int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
int_mv *bestmv, int_mv *ref_mv, int_mv *bestmv, int_mv *ref_mv,
int error_per_bit, int error_per_bit,

View File

@@ -1162,31 +1162,6 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
return NULL; return NULL;
} }
static vpx_codec_err_t vp8e_update_entropy(vpx_codec_alg_priv_t *ctx,
va_list args)
{
int update = va_arg(args, int);
vp8_update_entropy(ctx->cpi, update);
return VPX_CODEC_OK;
}
static vpx_codec_err_t vp8e_update_reference(vpx_codec_alg_priv_t *ctx,
va_list args)
{
int update = va_arg(args, int);
vp8_update_reference(ctx->cpi, update);
return VPX_CODEC_OK;
}
static vpx_codec_err_t vp8e_use_reference(vpx_codec_alg_priv_t *ctx,
va_list args)
{
int reference_flag = va_arg(args, int);
vp8_use_as_reference(ctx->cpi, reference_flag);
return VPX_CODEC_OK;
}
static vpx_codec_err_t vp8e_set_frame_flags(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t vp8e_set_frame_flags(vpx_codec_alg_priv_t *ctx,
va_list args) va_list args)
{ {

View File

@@ -137,6 +137,8 @@ struct macroblock {
// the visual quality at the boundary of moving color objects. // the visual quality at the boundary of moving color objects.
uint8_t color_sensitivity[2]; uint8_t color_sensitivity[2];
uint8_t sb_is_skin;
void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride); void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob); void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH

View File

@@ -344,7 +344,9 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
mv_col = ctx->best_sse_mv.as_mv.col; mv_col = ctx->best_sse_mv.as_mv.col;
mv_row = ctx->best_sse_mv.as_mv.row; mv_row = ctx->best_sse_mv.as_mv.row;
motion_magnitude = mv_row * mv_row + mv_col * mv_col; motion_magnitude = mv_row * mv_row + mv_col * mv_col;
if (denoiser->denoising_level == kDenHigh && motion_magnitude < 16) { if (!is_skin &&
denoiser->denoising_level == kDenHigh &&
motion_magnitude < 16) {
denoiser->increase_denoising = 1; denoiser->increase_denoising = 1;
} else { } else {
denoiser->increase_denoising = 0; denoiser->increase_denoising = 0;

View File

@@ -714,6 +714,10 @@ static int choose_partitioning(VP9_COMP *cpi,
s = x->plane[0].src.buf; s = x->plane[0].src.buf;
sp = x->plane[0].src.stride; sp = x->plane[0].src.stride;
// Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
// 5-20 for the 16x16 blocks.
force_split[0] = 0;
if (!is_key_frame) { if (!is_key_frame) {
// In the case of spatial/temporal scalable coding, the assumption here is // In the case of spatial/temporal scalable coding, the assumption here is
// that the temporal reference frame will always be of type LAST_FRAME. // that the temporal reference frame will always be of type LAST_FRAME.
@@ -768,6 +772,49 @@ static int choose_partitioning(VP9_COMP *cpi,
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
// Check if most of the superblock is skin content, and if so, force split
// to 32x32. Avoid checking superblocks on/near boundary for high resoln
// Note superblock may still pick 64X64 if y_sad is very small
// (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
x->sb_is_skin = 0;
#if !CONFIG_VP9_HIGHBITDEPTH
if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && (low_res || (mi_col >= 8 &&
mi_col + 8 < cm->mi_cols && mi_row >= 8 && mi_row + 8 < cm->mi_rows))) {
int num_16x16_skin = 0;
int num_16x16_nonskin = 0;
uint8_t *ysignal = x->plane[0].src.buf;
uint8_t *usignal = x->plane[1].src.buf;
uint8_t *vsignal = x->plane[2].src.buf;
int spuv = x->plane[1].src.stride;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
int is_skin = vp9_compute_skin_block(ysignal,
usignal,
vsignal,
sp,
spuv,
BLOCK_16X16);
num_16x16_skin += is_skin;
num_16x16_nonskin += (1 - is_skin);
if (num_16x16_nonskin > 3) {
// Exit loop if at least 4 of the 16x16 blocks are not skin.
i = 4;
j = 4;
}
ysignal += 16;
usignal += 8;
vsignal += 8;
}
ysignal += (sp << 4) - 64;
usignal += (spuv << 3) - 32;
vsignal += (spuv << 3) - 32;
}
if (num_16x16_skin > 12) {
x->sb_is_skin = 1;
force_split[0] = 1;
}
}
#endif
for (i = 1; i <= 2; ++i) { for (i = 1; i <= 2; ++i) {
struct macroblock_plane *p = &x->plane[i]; struct macroblock_plane *p = &x->plane[i];
struct macroblockd_plane *pd = &xd->plane[i]; struct macroblockd_plane *pd = &xd->plane[i];
@@ -779,6 +826,8 @@ static int choose_partitioning(VP9_COMP *cpi,
uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride); pd->dst.buf, pd->dst.stride);
// TODO(marpan): Investigate if we should lower this threshold if
// superblock is detected as skin.
x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2); x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
} }
@@ -818,9 +867,6 @@ static int choose_partitioning(VP9_COMP *cpi,
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
} }
// Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
// 5-20 for the 16x16 blocks.
force_split[0] = 0;
// Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
// for splits. // for splits.
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
@@ -3629,6 +3675,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
vp9_rd_cost_init(&dummy_rdc); vp9_rd_cost_init(&dummy_rdc);
x->color_sensitivity[0] = 0; x->color_sensitivity[0] = 0;
x->color_sensitivity[1] = 0; x->color_sensitivity[1] = 0;
x->sb_is_skin = 0;
if (seg->enabled) { if (seg->enabled) {
const uint8_t *const map = seg->update_map ? cpi->segmentation_map const uint8_t *const map = seg->update_map ? cpi->segmentation_map

View File

@@ -1720,15 +1720,13 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
gf_group->update_type[0] = OVERLAY_UPDATE; gf_group->update_type[0] = OVERLAY_UPDATE;
gf_group->rf_level[0] = INTER_NORMAL; gf_group->rf_level[0] = INTER_NORMAL;
gf_group->bit_allocation[0] = 0; gf_group->bit_allocation[0] = 0;
gf_group->arf_update_idx[0] = arf_buffer_indices[0];
gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
} else { } else {
gf_group->update_type[0] = GF_UPDATE; gf_group->update_type[0] = GF_UPDATE;
gf_group->rf_level[0] = GF_ARF_STD; gf_group->rf_level[0] = GF_ARF_STD;
gf_group->bit_allocation[0] = gf_arf_bits; gf_group->bit_allocation[0] = gf_arf_bits;
}
gf_group->arf_update_idx[0] = arf_buffer_indices[0]; gf_group->arf_update_idx[0] = arf_buffer_indices[0];
gf_group->arf_ref_idx[0] = arf_buffer_indices[0]; gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
}
// Step over the golden frame / overlay frame // Step over the golden frame / overlay frame
if (EOF == input_stats(twopass, &frame_stats)) if (EOF == input_stats(twopass, &frame_stats))

View File

@@ -207,7 +207,7 @@ struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx,
int drain) { int drain) {
struct lookahead_entry *buf = NULL; struct lookahead_entry *buf = NULL;
if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { if (ctx && ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
buf = pop(ctx, &ctx->read_idx); buf = pop(ctx, &ctx->read_idx);
ctx->sz--; ctx->sz--;
} }

View File

@@ -66,6 +66,7 @@ int enable_noise_estimation(VP9_COMP *const cpi) {
return 0; return 0;
} }
#if CONFIG_VP9_TEMPORAL_DENOISING
static void copy_frame(YV12_BUFFER_CONFIG * const dest, static void copy_frame(YV12_BUFFER_CONFIG * const dest,
const YV12_BUFFER_CONFIG * const src) { const YV12_BUFFER_CONFIG * const src) {
int r; int r;
@@ -81,6 +82,7 @@ static void copy_frame(YV12_BUFFER_CONFIG * const dest,
srcbuf += src->y_stride; srcbuf += src->y_stride;
} }
} }
#endif // CONFIG_VP9_TEMPORAL_DENOISING
NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) { NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) {
int noise_level = kLowLow; int noise_level = kLowLow;

View File

@@ -852,6 +852,12 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
if (var <= thresh_ac && (sse - var) <= thresh_dc) { if (var <= thresh_ac && (sse - var) <= thresh_dc) {
unsigned int sse_u, sse_v; unsigned int sse_u, sse_v;
unsigned int var_u, var_v; unsigned int var_u, var_v;
unsigned int thresh_ac_uv = thresh_ac;
unsigned int thresh_dc_uv = thresh_dc;
if (x->sb_is_skin) {
thresh_ac_uv = 0;
thresh_dc_uv = 0;
}
// Skip UV prediction unless breakout is zero (lossless) to save // Skip UV prediction unless breakout is zero (lossless) to save
// computation with low impact on the result // computation with low impact on the result
@@ -867,14 +873,14 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
xd->plane[1].dst.stride, &sse_u); xd->plane[1].dst.stride, &sse_u);
// U skipping condition checking // U skipping condition checking
if (((var_u << 2) <= thresh_ac) && (sse_u - var_u <= thresh_dc)) { if (((var_u << 2) <= thresh_ac_uv) && (sse_u - var_u <= thresh_dc_uv)) {
var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
x->plane[2].src.stride, x->plane[2].src.stride,
xd->plane[2].dst.buf, xd->plane[2].dst.buf,
xd->plane[2].dst.stride, &sse_v); xd->plane[2].dst.stride, &sse_v);
// V skipping condition checking // V skipping condition checking
if (((var_v << 2) <= thresh_ac) && (sse_v - var_v <= thresh_dc)) { if (((var_v << 2) <= thresh_ac_uv) && (sse_v - var_v <= thresh_dc_uv)) {
x->skip = 1; x->skip = 1;
// The cost of skip bit needs to be added. // The cost of skip bit needs to be added.
@@ -1585,7 +1591,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
if (cpi->oxcf.speed >= 5 && if (cpi->oxcf.speed >= 5 &&
cpi->oxcf.content != VP9E_CONTENT_SCREEN) { cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
!x->sb_is_skin) {
// Bias against non-zero (above some threshold) motion for large blocks. // Bias against non-zero (above some threshold) motion for large blocks.
// This is temporary fix to avoid selection of large mv for big blocks. // This is temporary fix to avoid selection of large mv for big blocks.
if (frame_mv[this_mode][ref_frame].as_mv.row > 64 || if (frame_mv[this_mode][ref_frame].as_mv.row > 64 ||

View File

@@ -15,22 +15,28 @@
#include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_skin_detection.h" #include "vp9/encoder/vp9_skin_detection.h"
#define MODEL_MODE 0
// Fixed-point skin color model parameters. // Fixed-point skin color model parameters.
static const int skin_mean[2] = {7463, 9614}; // q6 static const int skin_mean[5][2] = {
{7463, 9614}, {6400, 10240}, {7040, 10240}, {8320, 9280}, {6800, 9614}};
static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16 static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16
static const int skin_threshold = 1570636; // q18 static const int skin_threshold[2] = {1570636, 800000}; // q18
// Thresholds on luminance. // Thresholds on luminance.
static const int y_low = 20; static const int y_low = 20;
static const int y_high = 220; static const int y_high = 220;
// Evaluates the Mahalanobis distance measure for the input CbCr values. // Evaluates the Mahalanobis distance measure for the input CbCr values.
static int evaluate_skin_color_difference(int cb, int cr) { static int evaluate_skin_color_difference(int cb, int cr, int idx) {
const int cb_q6 = cb << 6; const int cb_q6 = cb << 6;
const int cr_q6 = cr << 6; const int cr_q6 = cr << 6;
const int cb_diff_q12 = (cb_q6 - skin_mean[0]) * (cb_q6 - skin_mean[0]); const int cb_diff_q12 =
const int cbcr_diff_q12 = (cb_q6 - skin_mean[0]) * (cr_q6 - skin_mean[1]); (cb_q6 - skin_mean[idx][0]) * (cb_q6 - skin_mean[idx][0]);
const int cr_diff_q12 = (cr_q6 - skin_mean[1]) * (cr_q6 - skin_mean[1]); const int cbcr_diff_q12 =
(cb_q6 - skin_mean[idx][0]) * (cr_q6 - skin_mean[idx][1]);
const int cr_diff_q12 =
(cr_q6 - skin_mean[idx][1]) * (cr_q6 - skin_mean[idx][1]);
const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10; const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10;
const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10; const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10;
const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10; const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10;
@@ -42,10 +48,21 @@ static int evaluate_skin_color_difference(int cb, int cr) {
} }
int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr) { int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr) {
if (y < y_low || y > y_high) if (y < y_low || y > y_high) {
return 0; return 0;
else } else {
return (evaluate_skin_color_difference(cb, cr) < skin_threshold); if (MODEL_MODE == 0) {
return (evaluate_skin_color_difference(cb, cr, 0) < skin_threshold[0]);
} else {
int i = 0;
for (; i < 5; i++) {
if (evaluate_skin_color_difference(cb, cr, i) < skin_threshold[1]) {
return 1;
}
}
return 0;
}
}
} }
int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,

View File

@@ -142,8 +142,10 @@ int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
vpx_prob *bestp, vpx_prob *bestp,
vpx_prob upd, vpx_prob upd,
int stepsize) { int stepsize) {
int i, old_b, new_b, update_b, savings, bestsavings, step; int i, old_b, new_b, update_b, savings, bestsavings;
int newp; int newp;
const int step_sign = *bestp > oldp[PIVOT_NODE] ? -1 : 1;
const int step = stepsize * step_sign;
vpx_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES]; vpx_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES];
vp9_model_to_full_probs(oldp, oldplist); vp9_model_to_full_probs(oldp, oldplist);
memcpy(newplist, oldp, sizeof(vpx_prob) * UNCONSTRAINED_NODES); memcpy(newplist, oldp, sizeof(vpx_prob) * UNCONSTRAINED_NODES);
@@ -154,9 +156,10 @@ int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
bestsavings = 0; bestsavings = 0;
bestnewp = oldp[PIVOT_NODE]; bestnewp = oldp[PIVOT_NODE];
if (*bestp > oldp[PIVOT_NODE]) { assert(stepsize > 0);
step = -stepsize;
for (newp = *bestp; newp > oldp[PIVOT_NODE]; newp += step) { for (newp = *bestp; (newp - oldp[PIVOT_NODE]) * step_sign < 0;
newp += step) {
if (newp < 1 || newp > 255) if (newp < 1 || newp > 255)
continue; continue;
newplist[PIVOT_NODE] = newp; newplist[PIVOT_NODE] = newp;
@@ -172,25 +175,6 @@ int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
bestnewp = newp; bestnewp = newp;
} }
} }
} else {
step = stepsize;
for (newp = *bestp; newp < oldp[PIVOT_NODE]; newp += step) {
if (newp < 1 || newp > 255)
continue;
newplist[PIVOT_NODE] = newp;
vp9_model_to_full_probs(newplist, newplist);
for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i)
new_b += cost_branch256(ct + 2 * i, newplist[i]);
new_b += cost_branch256(ct + 2 * PIVOT_NODE, newplist[PIVOT_NODE]);
update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) +
vp9_cost_upd256;
savings = old_b - new_b - update_b;
if (savings > bestsavings) {
bestsavings = savings;
bestnewp = newp;
}
}
}
*bestp = bestnewp; *bestp = bestnewp;
return bestsavings; return bestsavings;

View File

@@ -44,6 +44,7 @@ static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
} }
#if CONFIG_MISC_FIXES
static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) { const uint8_t *above, const uint8_t *left) {
int r, c; int r, c;
@@ -58,6 +59,7 @@ static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
dst += stride; dst += stride;
} }
} }
#endif // CONFIG_MISC_FIXES
static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) { const uint8_t *above, const uint8_t *left) {
@@ -76,6 +78,7 @@ static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
} }
} }
#if CONFIG_MISC_FIXES
static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) { const uint8_t *above, const uint8_t *left) {
int r, c; int r, c;
@@ -89,6 +92,7 @@ static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
dst += stride; dst += stride;
} }
} }
#endif // CONFIG_MISC_FIXES
static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) { const uint8_t *above, const uint8_t *left) {
@@ -109,6 +113,7 @@ static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
} }
} }
#if CONFIG_MISC_FIXES
static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) { const uint8_t *above, const uint8_t *left) {
int r, c; int r, c;
@@ -121,6 +126,7 @@ static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
dst += stride; dst += stride;
} }
} }
#endif // CONFIG_MISC_FIXES
static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) { const uint8_t *above, const uint8_t *left) {

View File

@@ -785,10 +785,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_idct4x4_1_add sse2/; specialize qw/vpx_idct4x4_1_add sse2/;
add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct8x8_64_add sse2/; specialize qw/vpx_idct8x8_64_add sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct8x8_12_add sse2/; specialize qw/vpx_idct8x8_12_add sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct8x8_1_add sse2/; specialize qw/vpx_idct8x8_1_add sse2/;
@@ -803,14 +803,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_idct16x16_1_add sse2/; specialize qw/vpx_idct16x16_1_add sse2/;
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_1024_add sse2/; specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_135_add sse2/; specialize qw/vpx_idct32x32_135_add sse2/, "$ssse3_x86_64_x86inc";
# Need to add 135 eob idct32x32 implementations.
$vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_34_add sse2/; specialize qw/vpx_idct32x32_34_add sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride"; add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_1_add sse2/; specialize qw/vpx_idct32x32_1_add sse2/;

View File

@@ -220,7 +220,24 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
mova m12, [pw_11585x2] mova m12, [pw_11585x2]
lea r3, [2 * strideq] lea r3, [2 * strideq]
%if CONFIG_VP9_HIGHBITDEPTH
mova m0, [inputq + 0]
packssdw m0, [inputq + 16]
mova m1, [inputq + 32]
packssdw m1, [inputq + 48]
mova m2, [inputq + 64]
packssdw m2, [inputq + 80]
mova m3, [inputq + 96]
packssdw m3, [inputq + 112]
mova m4, [inputq + 128]
packssdw m4, [inputq + 144]
mova m5, [inputq + 160]
packssdw m5, [inputq + 176]
mova m6, [inputq + 192]
packssdw m6, [inputq + 208]
mova m7, [inputq + 224]
packssdw m7, [inputq + 240]
%else
mova m0, [inputq + 0] mova m0, [inputq + 0]
mova m1, [inputq + 16] mova m1, [inputq + 16]
mova m2, [inputq + 32] mova m2, [inputq + 32]
@@ -229,7 +246,7 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
mova m5, [inputq + 80] mova m5, [inputq + 80]
mova m6, [inputq + 96] mova m6, [inputq + 96]
mova m7, [inputq + 112] mova m7, [inputq + 112]
%endif
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
IDCT8_1D IDCT8_1D
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
@@ -254,10 +271,21 @@ cglobal idct8x8_12_add, 3, 5, 13, input, output, stride
lea r3, [2 * strideq] lea r3, [2 * strideq]
%if CONFIG_VP9_HIGHBITDEPTH
mova m0, [inputq + 0]
packssdw m0, [inputq + 16]
mova m1, [inputq + 32]
packssdw m1, [inputq + 48]
mova m2, [inputq + 64]
packssdw m2, [inputq + 80]
mova m3, [inputq + 96]
packssdw m3, [inputq + 112]
%else
mova m0, [inputq + 0] mova m0, [inputq + 0]
mova m1, [inputq + 16] mova m1, [inputq + 16]
mova m2, [inputq + 32] mova m2, [inputq + 32]
mova m3, [inputq + 48] mova m3, [inputq + 48]
%endif
punpcklwd m0, m1 punpcklwd m0, m1
punpcklwd m2, m3 punpcklwd m2, m3
@@ -765,6 +793,24 @@ idct32x32_34:
lea r4, [rsp + transposed_in] lea r4, [rsp + transposed_in]
idct32x32_34_transpose: idct32x32_34_transpose:
%if CONFIG_VP9_HIGHBITDEPTH
mova m0, [r3 + 0]
packssdw m0, [r3 + 16]
mova m1, [r3 + 32 * 4]
packssdw m1, [r3 + 32 * 4 + 16]
mova m2, [r3 + 32 * 8]
packssdw m2, [r3 + 32 * 8 + 16]
mova m3, [r3 + 32 * 12]
packssdw m3, [r3 + 32 * 12 + 16]
mova m4, [r3 + 32 * 16]
packssdw m4, [r3 + 32 * 16 + 16]
mova m5, [r3 + 32 * 20]
packssdw m5, [r3 + 32 * 20 + 16]
mova m6, [r3 + 32 * 24]
packssdw m6, [r3 + 32 * 24 + 16]
mova m7, [r3 + 32 * 28]
packssdw m7, [r3 + 32 * 28 + 16]
%else
mova m0, [r3 + 0] mova m0, [r3 + 0]
mova m1, [r3 + 16 * 4] mova m1, [r3 + 16 * 4]
mova m2, [r3 + 16 * 8] mova m2, [r3 + 16 * 8]
@@ -773,6 +819,7 @@ idct32x32_34_transpose:
mova m5, [r3 + 16 * 20] mova m5, [r3 + 16 * 20]
mova m6, [r3 + 16 * 24] mova m6, [r3 + 16 * 24]
mova m7, [r3 + 16 * 28] mova m7, [r3 + 16 * 28]
%endif
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
@@ -1176,6 +1223,24 @@ idct32x32_135:
mov r7, 2 mov r7, 2
idct32x32_135_transpose: idct32x32_135_transpose:
%if CONFIG_VP9_HIGHBITDEPTH
mova m0, [r3 + 0]
packssdw m0, [r3 + 16]
mova m1, [r3 + 32 * 4]
packssdw m1, [r3 + 32 * 4 + 16]
mova m2, [r3 + 32 * 8]
packssdw m2, [r3 + 32 * 8 + 16]
mova m3, [r3 + 32 * 12]
packssdw m3, [r3 + 32 * 12 + 16]
mova m4, [r3 + 32 * 16]
packssdw m4, [r3 + 32 * 16 + 16]
mova m5, [r3 + 32 * 20]
packssdw m5, [r3 + 32 * 20 + 16]
mova m6, [r3 + 32 * 24]
packssdw m6, [r3 + 32 * 24 + 16]
mova m7, [r3 + 32 * 28]
packssdw m7, [r3 + 32 * 28 + 16]
%else
mova m0, [r3 + 0] mova m0, [r3 + 0]
mova m1, [r3 + 16 * 4] mova m1, [r3 + 16 * 4]
mova m2, [r3 + 16 * 8] mova m2, [r3 + 16 * 8]
@@ -1184,7 +1249,7 @@ idct32x32_135_transpose:
mova m5, [r3 + 16 * 20] mova m5, [r3 + 16 * 20]
mova m6, [r3 + 16 * 24] mova m6, [r3 + 16 * 24]
mova m7, [r3 + 16 * 28] mova m7, [r3 + 16 * 28]
%endif
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
mova [r4 + 0], m0 mova [r4 + 0], m0
@@ -1196,14 +1261,22 @@ idct32x32_135_transpose:
mova [r4 + 16 * 6], m6 mova [r4 + 16 * 6], m6
mova [r4 + 16 * 7], m7 mova [r4 + 16 * 7], m7
%if CONFIG_VP9_HIGHBITDEPTH
add r3, 32
%else
add r3, 16 add r3, 16
%endif
add r4, 16 * 8 add r4, 16 * 8
dec r7 dec r7
jne idct32x32_135_transpose jne idct32x32_135_transpose
IDCT32X32_135 16*0, 16*32, 16*64, 16*96 IDCT32X32_135 16*0, 16*32, 16*64, 16*96
lea stp, [stp + 16 * 8] lea stp, [stp + 16 * 8]
%if CONFIG_VP9_HIGHBITDEPTH
lea inputq, [inputq + 32 * 32]
%else
lea inputq, [inputq + 16 * 32] lea inputq, [inputq + 16 * 32]
%endif
dec r6 dec r6
jnz idct32x32_135 jnz idct32x32_135
@@ -1614,6 +1687,24 @@ idct32x32_1024:
mov r7, 4 mov r7, 4
idct32x32_1024_transpose: idct32x32_1024_transpose:
%if CONFIG_VP9_HIGHBITDEPTH
mova m0, [r3 + 0]
packssdw m0, [r3 + 16]
mova m1, [r3 + 32 * 4]
packssdw m1, [r3 + 32 * 4 + 16]
mova m2, [r3 + 32 * 8]
packssdw m2, [r3 + 32 * 8 + 16]
mova m3, [r3 + 32 * 12]
packssdw m3, [r3 + 32 * 12 + 16]
mova m4, [r3 + 32 * 16]
packssdw m4, [r3 + 32 * 16 + 16]
mova m5, [r3 + 32 * 20]
packssdw m5, [r3 + 32 * 20 + 16]
mova m6, [r3 + 32 * 24]
packssdw m6, [r3 + 32 * 24 + 16]
mova m7, [r3 + 32 * 28]
packssdw m7, [r3 + 32 * 28 + 16]
%else
mova m0, [r3 + 0] mova m0, [r3 + 0]
mova m1, [r3 + 16 * 4] mova m1, [r3 + 16 * 4]
mova m2, [r3 + 16 * 8] mova m2, [r3 + 16 * 8]
@@ -1622,6 +1713,7 @@ idct32x32_1024_transpose:
mova m5, [r3 + 16 * 20] mova m5, [r3 + 16 * 20]
mova m6, [r3 + 16 * 24] mova m6, [r3 + 16 * 24]
mova m7, [r3 + 16 * 28] mova m7, [r3 + 16 * 28]
%endif
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
@@ -1633,8 +1725,11 @@ idct32x32_1024_transpose:
mova [r4 + 16 * 5], m5 mova [r4 + 16 * 5], m5
mova [r4 + 16 * 6], m6 mova [r4 + 16 * 6], m6
mova [r4 + 16 * 7], m7 mova [r4 + 16 * 7], m7
%if CONFIG_VP9_HIGHBITDEPTH
add r3, 32
%else
add r3, 16 add r3, 16
%endif
add r4, 16 * 8 add r4, 16 * 8
dec r7 dec r7
jne idct32x32_1024_transpose jne idct32x32_1024_transpose
@@ -1642,7 +1737,11 @@ idct32x32_1024_transpose:
IDCT32X32_1024 16*0, 16*32, 16*64, 16*96 IDCT32X32_1024 16*0, 16*32, 16*64, 16*96
lea stp, [stp + 16 * 8] lea stp, [stp + 16 * 8]
%if CONFIG_VP9_HIGHBITDEPTH
lea inputq, [inputq + 32 * 32]
%else
lea inputq, [inputq + 16 * 32] lea inputq, [inputq + 16 * 32]
%endif
dec r6 dec r6
jnz idct32x32_1024 jnz idct32x32_1024