Merge changes from topic 'missing-proto'

* changes:
  vp9_subexp.h: add a missing prototype
  vp9: add some missing includes
  vp9 intrinsics: add vp9_rtcd include
  vp9: correct some function signatures
  vp9_variance_sse2: sync function signatures
  vp9/encoder: make some functions static
  vp9_dct_sse2: make some functions static
  vp9_decodeframe.c: make a function static
This commit is contained in:
James Zern 2015-05-15 23:08:14 +00:00 committed by Gerrit Code Review
commit 985f19bc6b
40 changed files with 103 additions and 69 deletions

View File

@ -11,6 +11,7 @@
#include "./vpx_config.h"
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_entropymv.h"

View File

@ -944,6 +944,6 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
have_top, have_left, have_right, x, y, plane);
}
void vp9_init_intra_predictors() {
void vp9_init_intra_predictors(void) {
once(vp9_init_intra_predictors_internal);
}

View File

@ -18,7 +18,7 @@
extern "C" {
#endif
void vp9_init_intra_predictors();
void vp9_init_intra_predictors(void);
void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
TX_SIZE tx_size, PREDICTION_MODE mode,

View File

@ -969,7 +969,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_highbd_avg_8x8/;
add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_highbd_avg_4x4/;
add_proto qw/unsigned int vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
add_proto qw/void vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vp9_highbd_minmax_8x8/;
}

View File

@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vpx_ports/mem.h"
#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
#include "vp9/common/vp9_idct.h"

View File

@ -9,6 +9,8 @@
*/
#include <immintrin.h> /* AVX2 */
#include "./vp9_rtcd.h"
#include "vpx_ports/mem.h"
static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,

View File

@ -9,6 +9,8 @@
*/
#include <emmintrin.h> // SSE2
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vpx_ports/emmintrin_compat.h"

View File

@ -9,6 +9,8 @@
*/
#include <tmmintrin.h>
#include "./vp9_rtcd.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/emmintrin_compat.h"

View File

@ -1867,14 +1867,15 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
}
#endif // CONFIG_VP9_HIGHBITDEPTH
void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
int plane, int bw, int bh, int x,
int y, int w, int h, int mi_x, int mi_y,
const InterpKernel *kernel,
const struct scale_factors *sf,
struct buf_2d *pre_buf, struct buf_2d *dst_buf,
const MV* mv, RefCntBuffer *ref_frame_buf,
int is_scaled, int ref) {
static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
int plane, int bw, int bh, int x,
int y, int w, int h, int mi_x, int mi_y,
const InterpKernel *kernel,
const struct scale_factors *sf,
struct buf_2d *pre_buf,
struct buf_2d *dst_buf, const MV* mv,
RefCntBuffer *ref_frame_buf,
int is_scaled, int ref) {
struct macroblockd_plane *const pd = &xd->plane[plane];
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
MV32 scaled_mv;

View File

@ -11,6 +11,7 @@
#include <limits.h>
#include <math.h>
#include "vp9/encoder/vp9_aq_complexity.h"
#include "vp9/encoder/vp9_aq_variance.h"
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/common/vp9_seg_common.h"

View File

@ -16,6 +16,8 @@
extern "C" {
#endif
#include "vp9/common/vp9_enums.h"
struct VP9_COMP;
struct macroblock;

View File

@ -357,7 +357,7 @@ void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) {
// 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock.
// Blocks labeled as BOOST1 may later get set to BOOST2 (during the
// encoding of the superblock).
void vp9_cyclic_refresh_update_map(VP9_COMP *const cpi) {
static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
unsigned char *const seg_map = cpi->segmentation_map;
@ -510,7 +510,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
// Update the segmentation and refresh map.
vp9_cyclic_refresh_update_map(cpi);
cyclic_refresh_update_map(cpi);
}
}

View File

@ -7,6 +7,7 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
#include "vpx_ports/mem.h"

View File

@ -361,7 +361,7 @@ static void get_variance(var *v) {
((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count);
}
void sum_2_variances(const var *a, const var *b, var *r) {
static void sum_2_variances(const var *a, const var *b, var *r) {
assert(a->log2_count == b->log2_count);
fill_variance(a->sum_square_error + b->sum_square_error,
a->sum_error + b->sum_error, a->log2_count + 1, r);

View File

@ -22,7 +22,7 @@ static struct vp9_token mv_class_encodings[MV_CLASSES];
static struct vp9_token mv_fp_encodings[MV_FP_SIZE];
static struct vp9_token mv_class0_encodings[CLASS0_SIZE];
void vp9_entropy_mv_init() {
void vp9_entropy_mv_init(void) {
vp9_tokens_from_tree(mv_joint_encodings, vp9_mv_joint_tree);
vp9_tokens_from_tree(mv_class_encodings, vp9_mv_class_tree);
vp9_tokens_from_tree(mv_class0_encodings, vp9_mv_class0_tree);

View File

@ -18,7 +18,7 @@
extern "C" {
#endif
void vp9_entropy_mv_init();
void vp9_entropy_mv_init(void);
void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w,
nmv_context_counts *const counts);

View File

@ -112,7 +112,7 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
// Mark all inactive blocks as active. Other segmentation features may be set
// so memset cannot be used, instead only inactive blocks should be reset.
void vp9_suppress_active_map(VP9_COMP *cpi) {
static void suppress_active_map(VP9_COMP *cpi) {
unsigned char *const seg_map = cpi->segmentation_map;
int i;
if (cpi->active_map.enabled || cpi->active_map.update)
@ -121,7 +121,7 @@ void vp9_suppress_active_map(VP9_COMP *cpi) {
seg_map[i] = AM_SEGMENT_ID_ACTIVE;
}
void vp9_apply_active_map(VP9_COMP *cpi) {
static void apply_active_map(VP9_COMP *cpi) {
struct segmentation *const seg = &cpi->common.seg;
unsigned char *const seg_map = cpi->segmentation_map;
const unsigned char *const active_map = cpi->active_map.map;
@ -2936,7 +2936,7 @@ static void init_motion_estimation(VP9_COMP *cpi) {
}
}
void set_frame_size(VP9_COMP *cpi) {
static void set_frame_size(VP9_COMP *cpi) {
int ref_frame;
VP9_COMMON *const cm = &cpi->common;
VP9EncoderConfig *const oxcf = &cpi->oxcf;
@ -3033,7 +3033,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi) {
setup_frame(cpi);
vp9_suppress_active_map(cpi);
suppress_active_map(cpi);
// Variance adaptive and in frame q adjustment experiments are mutually
// exclusive.
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
@ -3043,7 +3043,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi) {
} else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
vp9_cyclic_refresh_setup(cpi);
}
vp9_apply_active_map(cpi);
apply_active_map(cpi);
// transform / motion compensation build reconstruction frame
vp9_encode_frame(cpi);
@ -3397,7 +3397,7 @@ static void set_arf_sign_bias(VP9_COMP *cpi) {
cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias;
}
int setup_interp_filter_search_mask(VP9_COMP *cpi) {
static int setup_interp_filter_search_mask(VP9_COMP *cpi) {
INTERP_FILTER ifilter;
int ref_total[MAX_REF_FRAMES] = {0};
MV_REFERENCE_FRAME ref;
@ -3791,8 +3791,8 @@ static int frame_is_reference(const VP9_COMP *cpi) {
cm->seg.update_data;
}
void adjust_frame_rate(VP9_COMP *cpi,
const struct lookahead_entry *source) {
static void adjust_frame_rate(VP9_COMP *cpi,
const struct lookahead_entry *source) {
int64_t this_duration;
int step = 0;
@ -3877,7 +3877,8 @@ extern double vp9_get_blockiness(const unsigned char *img1, int img1_pitch,
int width, int height);
#endif
void adjust_image_stat(double y, double u, double v, double all, ImageStat *s) {
static void adjust_image_stat(double y, double u, double v, double all,
ImageStat *s) {
s->stat[Y] += y;
s->stat[U] += u;
s->stat[V] += v;

View File

@ -2414,7 +2414,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
// Define the reference buffers that will be updated post encode.
void configure_buffer_updates(VP9_COMP *cpi) {
static void configure_buffer_updates(VP9_COMP *cpi) {
TWO_PASS *const twopass = &cpi->twopass;
cpi->rc.is_src_frame_alt_ref = 0;
@ -2461,7 +2461,7 @@ void configure_buffer_updates(VP9_COMP *cpi) {
}
}
int is_skippable_frame(const VP9_COMP *cpi) {
static int is_skippable_frame(const VP9_COMP *cpi) {
// If the current frame does not have non-zero motion vector detected in the
// first pass, and so do its previous and forward frames, then this frame
// can be skipped for partition check, and the partition size is assigned

View File

@ -137,7 +137,7 @@ static void init_minq_luts(int *kf_low_m, int *kf_high_m,
}
}
void vp9_rc_init_minq_luts() {
void vp9_rc_init_minq_luts(void) {
init_minq_luts(kf_low_motion_minq_8, kf_high_motion_minq_8,
arfgf_low_motion_minq_8, arfgf_high_motion_minq_8,
inter_minq_8, rtc_minq_8, VPX_BITS_8);

View File

@ -152,7 +152,7 @@ int vp9_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth);
void vp9_rc_init_minq_luts();
void vp9_rc_init_minq_luts(void);
// Generally at the high level, the following flow is expected
// to be enforced for rate control:

View File

@ -129,7 +129,7 @@ static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
}
}
void vp9_init_me_luts() {
void vp9_init_me_luts(void) {
init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
VPX_BITS_8);
#if CONFIG_VP9_HIGHBITDEPTH

View File

@ -150,7 +150,7 @@ int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi,
int ref_frame);
void vp9_init_me_luts();
void vp9_init_me_luts(void);
void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
const struct macroblockd_plane *pd,

View File

@ -428,7 +428,7 @@ static int get_down2_length(int length, int steps) {
return length;
}
int get_down2_steps(int in_length, int out_length) {
static int get_down2_steps(int in_length, int out_length) {
int steps = 0;
int proj_in_length;
while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) {

View File

@ -12,6 +12,7 @@
#include "vp9/common/vp9_entropy.h"
#include "vp9/encoder/vp9_cost.h"
#include "vp9/encoder/vp9_subexp.h"
#include "vp9/encoder/vp9_writer.h"
#define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))

View File

@ -16,11 +16,15 @@
extern "C" {
#endif
void vp9_write_prob_diff_update(vp9_writer *w,
#include "vp9/common/vp9_prob.h"
struct vp9_writer;
void vp9_write_prob_diff_update(struct vp9_writer *w,
vp9_prob newp, vp9_prob oldp);
void vp9_cond_prob_diff_update(vp9_writer *w, vp9_prob *oldp,
unsigned int *ct);
void vp9_cond_prob_diff_update(struct vp9_writer *w, vp9_prob *oldp,
const unsigned int ct[2]);
int vp9_prob_diff_update_savings_search(const unsigned int *ct,
vp9_prob oldp, vp9_prob *bestp,

View File

@ -23,6 +23,7 @@
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/encoder/vp9_temporal_filter.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/vpx_timer.h"
@ -110,7 +111,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
kernel, mv_precision_uv, x, y);
}
void vp9_temporal_filter_init() {
void vp9_temporal_filter_init(void) {
int i;
fixed_divide[0] = 0;

View File

@ -15,7 +15,7 @@
extern "C" {
#endif
void vp9_temporal_filter_init();
void vp9_temporal_filter_init(void);
void vp9_temporal_filter(VP9_COMP *cpi, int distance);
#ifdef __cplusplus

View File

@ -19,7 +19,7 @@
extern "C" {
#endif
typedef struct {
typedef struct vp9_writer {
unsigned int lowvalue;
unsigned int range;
int count;

View File

@ -9,6 +9,8 @@
*/
#include <emmintrin.h>
#include "./vp9_rtcd.h"
#include "vpx_ports/mem.h"
void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp,

View File

@ -9,6 +9,8 @@
*/
#include <immintrin.h> // AVX2
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vpx_ports/mem.h"

View File

@ -9,6 +9,8 @@
*/
#include <emmintrin.h> // SSE2
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vp9/encoder/x86/vp9_dct_sse2.h"
#include "vp9/encoder/vp9_dct.h"

View File

@ -9,6 +9,8 @@
*/
#include <emmintrin.h> // SSE2
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vp9/encoder/vp9_dct.h"
#include "vp9/encoder/x86/vp9_dct_sse2.h"

View File

@ -10,6 +10,8 @@
#include <assert.h>
#include <emmintrin.h> // SSE2
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vp9/encoder/vp9_dct.h"
#include "vp9/encoder/x86/vp9_dct_sse2.h"
@ -96,7 +98,7 @@ static INLINE void transpose_4x4(__m128i *res) {
res[3] = _mm_unpackhi_epi64(res[2], res[2]);
}
void fdct4_sse2(__m128i *in) {
static void fdct4_sse2(__m128i *in) {
const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
@ -129,7 +131,7 @@ void fdct4_sse2(__m128i *in) {
transpose_4x4(in);
}
void fadst4_sse2(__m128i *in) {
static void fadst4_sse2(__m128i *in) {
const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9);
const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
@ -831,7 +833,7 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
// 07 17 27 37 47 57 67 77
}
void fdct8_sse2(__m128i *in) {
static void fdct8_sse2(__m128i *in) {
// constants
const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
@ -971,7 +973,7 @@ void fdct8_sse2(__m128i *in) {
array_transpose_8x8(in, in);
}
void fadst8_sse2(__m128i *in) {
static void fadst8_sse2(__m128i *in) {
// Constants
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
@ -1353,7 +1355,7 @@ static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) {
right_shift_8x8(res1 + 8, 2);
}
void fdct16_8col(__m128i *in) {
static void fdct16_8col(__m128i *in) {
// perform 16x16 1-D DCT for 8 columns
__m128i i[8], s[8], p[8], t[8], u[16], v[16];
const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
@ -1675,7 +1677,7 @@ void fdct16_8col(__m128i *in) {
in[15] = _mm_packs_epi32(v[14], v[15]);
}
void fadst16_8col(__m128i *in) {
static void fadst16_8col(__m128i *in) {
// perform 16x16 1-D ADST for 8 columns
__m128i s[16], x[16], u[32], v[32];
const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@ -2145,13 +2147,13 @@ void fadst16_8col(__m128i *in) {
in[15] = _mm_sub_epi16(kZero, s[1]);
}
void fdct16_sse2(__m128i *in0, __m128i *in1) {
static void fdct16_sse2(__m128i *in0, __m128i *in1) {
fdct16_8col(in0);
fdct16_8col(in1);
array_transpose_16x16(in0, in1);
}
void fadst16_sse2(__m128i *in0, __m128i *in1) {
static void fadst16_sse2(__m128i *in0, __m128i *in1) {
fadst16_8col(in0);
fadst16_8col(in1);
array_transpose_16x16(in0, in1);
@ -2334,7 +2336,7 @@ void vp9_highbd_fht8x8_sse2(const int16_t *input, tran_low_t *output,
}
}
void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
void vp9_highbd_fht16x16_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vp9_highbd_fdct16x16_sse2(input, output, stride);

View File

@ -15,6 +15,8 @@
#include <math.h>
#endif
#include <tmmintrin.h> // SSSE3
#include "./vp9_rtcd.h"
#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,

View File

@ -9,8 +9,9 @@
*/
#include <immintrin.h> // AVX2
#include "vpx/vpx_integer.h"
#include "./vp9_rtcd.h"
#include "vpx/vpx_integer.h"
int64_t vp9_block_error_avx2(const int16_t *coeff,
const int16_t *dqcoeff,

View File

@ -11,6 +11,7 @@
#include <emmintrin.h>
#include <xmmintrin.h>
#include "./vp9_rtcd.h"
#include "vpx/vpx_integer.h"
void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,

View File

@ -9,6 +9,8 @@
*/
#include <immintrin.h> // AVX2
#include "./vp9_rtcd.h"
#include "vpx_ports/mem.h"
#include "vp9/encoder/vp9_variance.h"

View File

@ -7,6 +7,7 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/encoder/vp9_variance.h"

View File

@ -10,6 +10,8 @@
#include <immintrin.h> // AVX2
#include "./vp9_rtcd.h"
void vp9_get16x16var_avx2(const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,

View File

@ -10,14 +10,15 @@
#include <emmintrin.h> // SSE2
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/encoder/vp9_variance.h"
#include "vpx_ports/mem.h"
typedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride,
const unsigned char *ref, int ref_stride,
unsigned int *sse, int *sum);
typedef void (*variance_fn_t)(const unsigned char *src, int src_stride,
const unsigned char *ref, int ref_stride,
unsigned int *sse, int *sum);
unsigned int vp9_get_mb_ss_sse2(const int16_t *src) {
__m128i vsum = _mm_setzero_si128();
@ -38,9 +39,9 @@ unsigned int vp9_get_mb_ss_sse2(const int16_t *src) {
_mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \
_mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride)))
unsigned int vp9_get4x4var_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse, int *sum) {
static void get4x4var_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse, int *sum) {
const __m128i zero = _mm_setzero_si128();
const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero);
const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero);
@ -62,13 +63,11 @@ unsigned int vp9_get4x4var_sse2(const uint8_t *src, int src_stride,
vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
*sse = _mm_cvtsi128_si32(vsum);
return 0;
}
unsigned int vp9_get8x8var_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse, int *sum) {
void vp9_get8x8var_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse, int *sum) {
const __m128i zero = _mm_setzero_si128();
__m128i vsum = _mm_setzero_si128();
__m128i vsse = _mm_setzero_si128();
@ -103,13 +102,11 @@ unsigned int vp9_get8x8var_sse2(const uint8_t *src, int src_stride,
vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
*sse = _mm_cvtsi128_si32(vsse);
return 0;
}
unsigned int vp9_get16x16var_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse, int *sum) {
void vp9_get16x16var_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse, int *sum) {
const __m128i zero = _mm_setzero_si128();
__m128i vsum = _mm_setzero_si128();
__m128i vsse = _mm_setzero_si128();
@ -146,8 +143,6 @@ unsigned int vp9_get16x16var_sse2(const uint8_t *src, int src_stride,
vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
*sse = _mm_cvtsi128_si32(vsse);
return 0;
}
@ -176,7 +171,7 @@ unsigned int vp9_variance4x4_sse2(const unsigned char *src, int src_stride,
const unsigned char *ref, int ref_stride,
unsigned int *sse) {
int sum;
vp9_get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
return *sse - (((unsigned int)sum * sum) >> 4);
}
@ -185,7 +180,7 @@ unsigned int vp9_variance8x4_sse2(const uint8_t *src, int src_stride,
unsigned int *sse) {
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 8, 4,
sse, &sum, vp9_get4x4var_sse2, 4);
sse, &sum, get4x4var_sse2, 4);
return *sse - (((unsigned int)sum * sum) >> 5);
}
@ -194,7 +189,7 @@ unsigned int vp9_variance4x8_sse2(const uint8_t *src, int src_stride,
unsigned int *sse) {
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 4, 8,
sse, &sum, vp9_get4x4var_sse2, 4);
sse, &sum, get4x4var_sse2, 4);
return *sse - (((unsigned int)sum * sum) >> 5);
}