this commit converts all sad ptrs to uint32
sse4_1 code used uint16_t for returning sad, but that won't work for 32x32 or 64x64. This code fixes the assembly for those and also reenables sse4_1 on linux Change-Id: I5ce7288d581db870a148e5f7c5092826f59edd81
This commit is contained in:
parent
b715e371c0
commit
714aa9f3c0
@ -997,17 +997,6 @@ process_common_toolchain() {
|
||||
#error "not x32"
|
||||
#endif
|
||||
EOF
|
||||
soft_enable runtime_cpu_detect
|
||||
soft_enable mmx
|
||||
soft_enable sse
|
||||
soft_enable sse2
|
||||
soft_enable sse3
|
||||
soft_enable ssse3
|
||||
if enabled gcc && ! disabled sse4_1 && ! check_cflags -msse4; then
|
||||
RTCD_OPTIONS="${RTCD_OPTIONS}--disable-sse4_1 "
|
||||
else
|
||||
soft_enable sse4_1
|
||||
fi
|
||||
|
||||
case ${tgt_os} in
|
||||
win*)
|
||||
@ -1061,6 +1050,18 @@ EOF
|
||||
;;
|
||||
esac
|
||||
|
||||
soft_enable runtime_cpu_detect
|
||||
soft_enable mmx
|
||||
soft_enable sse
|
||||
soft_enable sse2
|
||||
soft_enable sse3
|
||||
soft_enable ssse3
|
||||
if enabled gcc && ! disabled sse4_1 && ! check_cflags -msse4; then
|
||||
RTCD_OPTIONS="${RTCD_OPTIONS}--disable-sse4_1 "
|
||||
else
|
||||
soft_enable sse4_1
|
||||
fi
|
||||
|
||||
case "${AS}" in
|
||||
auto|"")
|
||||
which nasm >/dev/null 2>&1 && AS=nasm
|
||||
|
@ -449,25 +449,25 @@ specialize vp9_sad8x8x3 sse3
|
||||
prototype void vp9_sad4x4x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"
|
||||
specialize vp9_sad4x4x3 sse3
|
||||
|
||||
prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
|
||||
prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
|
||||
specialize vp9_sad64x64x8
|
||||
|
||||
prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
|
||||
prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
|
||||
specialize vp9_sad32x32x8
|
||||
|
||||
prototype void vp9_sad16x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
|
||||
prototype void vp9_sad16x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
|
||||
specialize vp9_sad16x16x8 sse4
|
||||
|
||||
prototype void vp9_sad16x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
|
||||
prototype void vp9_sad16x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
|
||||
specialize vp9_sad16x8x8 sse4
|
||||
|
||||
prototype void vp9_sad8x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
|
||||
prototype void vp9_sad8x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
|
||||
specialize vp9_sad8x16x8 sse4
|
||||
|
||||
prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
|
||||
prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
|
||||
specialize vp9_sad8x8x8 sse4
|
||||
|
||||
prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
|
||||
prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
|
||||
specialize vp9_sad4x4x8 sse4
|
||||
|
||||
prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array"
|
||||
@ -490,7 +490,6 @@ specialize vp9_sad8x8x4d sse2
|
||||
|
||||
prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array"
|
||||
specialize vp9_sad4x4x4d sse
|
||||
|
||||
prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
|
||||
specialize vp9_sub_pixel_mse16x16 sse2 mmx
|
||||
|
||||
|
@ -1782,7 +1782,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
||||
int col_min = ref_col - distance;
|
||||
int col_max = ref_col + distance;
|
||||
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, sad_array8, 8);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
|
||||
unsigned int sad_array[3];
|
||||
int_mv fcenter_mv;
|
||||
|
||||
|
@ -103,62 +103,62 @@ void vp9_sad64x64x8_c(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
const uint8_t *ref_ptr,
|
||||
int ref_stride,
|
||||
uint16_t *sad_array) {
|
||||
sad_array[0] = (uint16_t)vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = (uint16_t)vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = (uint16_t)vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = (uint16_t)vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = (uint16_t)vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = (uint16_t)vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = (uint16_t)vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = (uint16_t)vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
unsigned int *sad_array) {
|
||||
sad_array[0] = vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = vp9_sad64x64(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
}
|
||||
|
||||
void vp9_sad32x32x8_c(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
const uint8_t *ref_ptr,
|
||||
int ref_stride,
|
||||
uint16_t *sad_array) {
|
||||
sad_array[0] = (uint16_t)vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = (uint16_t)vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = (uint16_t)vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = (uint16_t)vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = (uint16_t)vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = (uint16_t)vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = (uint16_t)vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = (uint16_t)vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
unsigned int *sad_array) {
|
||||
sad_array[0] = vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = vp9_sad32x32(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
}
|
||||
|
||||
void vp9_sad16x16x3_c(const uint8_t *src_ptr,
|
||||
@ -178,31 +178,31 @@ void vp9_sad16x16x8_c(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
const uint8_t *ref_ptr,
|
||||
int ref_stride,
|
||||
uint16_t *sad_array) {
|
||||
sad_array[0] = (uint16_t)vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = (uint16_t)vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = (uint16_t)vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = (uint16_t)vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = (uint16_t)vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = (uint16_t)vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = (uint16_t)vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = (uint16_t)vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
uint32_t *sad_array) {
|
||||
sad_array[0] = vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = vp9_sad16x16(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
}
|
||||
|
||||
void vp9_sad16x8x3_c(const uint8_t *src_ptr,
|
||||
@ -222,31 +222,31 @@ void vp9_sad16x8x8_c(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
const uint8_t *ref_ptr,
|
||||
int ref_stride,
|
||||
uint16_t *sad_array) {
|
||||
sad_array[0] = (uint16_t)vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = (uint16_t)vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = (uint16_t)vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = (uint16_t)vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = (uint16_t)vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = (uint16_t)vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = (uint16_t)vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = (uint16_t)vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
uint32_t *sad_array) {
|
||||
sad_array[0] = vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = vp9_sad16x8(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
}
|
||||
|
||||
void vp9_sad8x8x3_c(const uint8_t *src_ptr,
|
||||
@ -266,31 +266,31 @@ void vp9_sad8x8x8_c(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
const uint8_t *ref_ptr,
|
||||
int ref_stride,
|
||||
uint16_t *sad_array) {
|
||||
sad_array[0] = (uint16_t)vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = (uint16_t)vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = (uint16_t)vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = (uint16_t)vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = (uint16_t)vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = (uint16_t)vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = (uint16_t)vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = (uint16_t)vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
uint32_t *sad_array) {
|
||||
sad_array[0] = vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = vp9_sad8x8(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
}
|
||||
|
||||
void vp9_sad8x16x3_c(const uint8_t *src_ptr,
|
||||
@ -310,31 +310,31 @@ void vp9_sad8x16x8_c(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
const uint8_t *ref_ptr,
|
||||
int ref_stride,
|
||||
uint16_t *sad_array) {
|
||||
sad_array[0] = (uint16_t)vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = (uint16_t)vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = (uint16_t)vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = (uint16_t)vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = (uint16_t)vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = (uint16_t)vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = (uint16_t)vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = (uint16_t)vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
uint32_t *sad_array) {
|
||||
sad_array[0] = vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = vp9_sad8x16(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
}
|
||||
|
||||
void vp9_sad4x4x3_c(const uint8_t *src_ptr,
|
||||
@ -354,31 +354,31 @@ void vp9_sad4x4x8_c(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
const uint8_t *ref_ptr,
|
||||
int ref_stride,
|
||||
uint16_t *sad_array) {
|
||||
sad_array[0] = (uint16_t)vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = (uint16_t)vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = (uint16_t)vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = (uint16_t)vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = (uint16_t)vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = (uint16_t)vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = (uint16_t)vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = (uint16_t)vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
uint32_t *sad_array) {
|
||||
sad_array[0] = vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[1] = vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 1, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[2] = vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 2, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[3] = vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 3, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[4] = vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 4, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[5] = vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 5, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[6] = vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 6, ref_stride,
|
||||
0x7fffffff);
|
||||
sad_array[7] = vp9_sad4x4(src_ptr, src_stride,
|
||||
ref_ptr + 7, ref_stride,
|
||||
0x7fffffff);
|
||||
}
|
||||
|
||||
void vp9_sad64x64x4d_c(const uint8_t *src_ptr,
|
||||
|
@ -29,7 +29,7 @@ typedef void (*vp9_sad_multi1_fn_t)(const uint8_t *src_ptr,
|
||||
int source_stride,
|
||||
const uint8_t *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array);
|
||||
unsigned int *sad_array);
|
||||
|
||||
typedef void (*vp9_sad_multi_d_fn_t)(const uint8_t *src_ptr,
|
||||
int source_stride,
|
||||
|
@ -154,6 +154,16 @@
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
%macro WRITE_AS_INTS 0
|
||||
mov rdi, arg(4) ;Results
|
||||
pxor xmm0, xmm0
|
||||
movdqa xmm2, xmm1
|
||||
punpcklwd xmm1, xmm0
|
||||
punpckhwd xmm2, xmm0
|
||||
|
||||
movdqa [rdi], xmm1
|
||||
movdqa [rdi + 16], xmm2
|
||||
%endmacro
|
||||
|
||||
;void vp9_sad16x16x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
@ -170,23 +180,22 @@ sym(vp9_sad16x16x8_sse4):
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
WRITE_AS_INTS
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@ -212,19 +221,18 @@ sym(vp9_sad16x8x8_sse4):
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
WRITE_AS_INTS
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@ -250,19 +258,18 @@ sym(vp9_sad8x8x8_sse4):
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
WRITE_AS_INTS
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@ -288,22 +295,22 @@ sym(vp9_sad8x16x8_sse4):
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
|
||||
WRITE_AS_INTS
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@ -329,17 +336,16 @@ sym(vp9_sad4x4x8_sse4):
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_4X2X8 1
|
||||
PROCESS_4X2X8 0
|
||||
PROCESS_4X2X8 1
|
||||
PROCESS_4X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
WRITE_AS_INTS
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
|
Loading…
Reference in New Issue
Block a user