Adding sse2 variant for vp9_mse{8x8, 8x16, 16x8}.

Change-Id: I6786d25ce4f32b8d8912f2d239a45ca15b310c4b
This commit is contained in:
Dmitry Kovalev 2014-09-02 12:09:14 -07:00
parent ab73dba65f
commit 48197f0a70
2 changed files with 32 additions and 12 deletions

View File

@ -693,16 +693,16 @@ add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, cons
specialize qw/vp9_sad4x4x4d sse/;
add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_mse16x16 avx2/, "$sse2_x86inc";
specialize qw/vp9_mse16x16 sse2 avx2/;
add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_mse8x16/;
specialize qw/vp9_mse8x16 sse2/;
add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_mse16x8/;
specialize qw/vp9_mse16x8 sse2/;
add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_mse8x8/;
specialize qw/vp9_mse8x8 sse2/;
add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
specialize qw/vp9_get_mb_ss sse2/;

View File

@ -217,14 +217,6 @@ unsigned int vp9_variance16x16_sse2(const unsigned char *src, int src_stride,
return *sse - (((unsigned int)sum * sum) >> 8);
}
unsigned int vp9_mse16x16_sse2(const unsigned char *src, int src_stride,
const unsigned char *ref, int ref_stride,
unsigned int *sse) {
int sum;
vp9_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
return *sse;
}
unsigned int vp9_variance32x32_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
@ -279,6 +271,34 @@ unsigned int vp9_variance32x64_sse2(const uint8_t *src, int src_stride,
return *sse - (((int64_t)sum * sum) >> 11);
}
unsigned int vp9_mse8x8_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
vp9_variance8x8_sse2(src, src_stride, ref, ref_stride, sse);
return *sse;
}
unsigned int vp9_mse8x16_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
vp9_variance8x16_sse2(src, src_stride, ref, ref_stride, sse);
return *sse;
}
unsigned int vp9_mse16x8_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
vp9_variance16x8_sse2(src, src_stride, ref, ref_stride, sse);
return *sse;
}
unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);
return *sse;
}
#define DECL(w, opt) \
int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \
ptrdiff_t src_stride, \