Merge "block error sse2: use tran_low_t"
This commit is contained in:
@@ -125,7 +125,7 @@ if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") {
|
|||||||
|
|
||||||
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||||
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
|
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
|
||||||
specialize qw/vp9_block_error avx2/;
|
specialize qw/vp9_block_error avx2 sse2/;
|
||||||
|
|
||||||
add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
|
add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
|
||||||
specialize qw/vp9_highbd_block_error sse2/;
|
specialize qw/vp9_highbd_block_error sse2/;
|
||||||
|
|||||||
@@ -15,8 +15,6 @@
|
|||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
%if CONFIG_VP9_HIGHBITDEPTH
|
|
||||||
%else
|
|
||||||
; int64_t vp9_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size,
|
; int64_t vp9_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size,
|
||||||
; int64_t *ssz)
|
; int64_t *ssz)
|
||||||
|
|
||||||
@@ -25,14 +23,14 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
|
|||||||
pxor m4, m4 ; sse accumulator
|
pxor m4, m4 ; sse accumulator
|
||||||
pxor m6, m6 ; ssz accumulator
|
pxor m6, m6 ; ssz accumulator
|
||||||
pxor m5, m5 ; dedicated zero register
|
pxor m5, m5 ; dedicated zero register
|
||||||
lea uqcq, [uqcq+sizeq*2]
|
|
||||||
lea dqcq, [dqcq+sizeq*2]
|
|
||||||
neg sizeq
|
|
||||||
.loop:
|
.loop:
|
||||||
mova m2, [uqcq+sizeq*2]
|
LOAD_TRAN_LOW 2, uqcq, 0
|
||||||
mova m0, [dqcq+sizeq*2]
|
LOAD_TRAN_LOW 0, dqcq, 0
|
||||||
mova m3, [uqcq+sizeq*2+mmsize]
|
LOAD_TRAN_LOW 3, uqcq, 8
|
||||||
mova m1, [dqcq+sizeq*2+mmsize]
|
LOAD_TRAN_LOW 1, dqcq, 8
|
||||||
|
INCREMENT_ELEMENTS_TRAN_LOW uqcq, 16
|
||||||
|
INCREMENT_ELEMENTS_TRAN_LOW dqcq, 16
|
||||||
|
sub sizeq, 16
|
||||||
psubw m0, m2
|
psubw m0, m2
|
||||||
psubw m1, m3
|
psubw m1, m3
|
||||||
; individual errors are max. 15bit+sign, so squares are 30bit, and
|
; individual errors are max. 15bit+sign, so squares are 30bit, and
|
||||||
@@ -58,8 +56,7 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
|
|||||||
punpckhdq m3, m5
|
punpckhdq m3, m5
|
||||||
paddq m6, m7
|
paddq m6, m7
|
||||||
paddq m6, m3
|
paddq m6, m3
|
||||||
add sizeq, mmsize
|
jg .loop
|
||||||
jl .loop
|
|
||||||
|
|
||||||
; accumulate horizontally and store in return value
|
; accumulate horizontally and store in return value
|
||||||
movhlps m5, m4
|
movhlps m5, m4
|
||||||
@@ -77,7 +74,6 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
|
|||||||
movd edx, m5
|
movd edx, m5
|
||||||
%endif
|
%endif
|
||||||
RET
|
RET
|
||||||
%endif ; CONFIG_VP9_HIGHBITDEPTH
|
|
||||||
|
|
||||||
; Compute the sum of squared difference between two tran_low_t vectors.
|
; Compute the sum of squared difference between two tran_low_t vectors.
|
||||||
; Vectors are converted (if necessary) to int16_t for calculations.
|
; Vectors are converted (if necessary) to int16_t for calculations.
|
||||||
|
|||||||
Reference in New Issue
Block a user