Merge "BUG FIX: sse2 subpel variance is not PIC compliant"
This commit is contained in:
commit
15b261d854
@ -91,7 +91,7 @@ SECTION .text
|
|||||||
%define filter_idx_shift 5
|
%define filter_idx_shift 5
|
||||||
|
|
||||||
|
|
||||||
%ifdef PIC ; 64bit PIC
|
%if ARCH_X86_64
|
||||||
%if %2 == 1 ; avg
|
%if %2 == 1 ; avg
|
||||||
cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
|
cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
|
||||||
x_offset, y_offset, \
|
x_offset, y_offset, \
|
||||||
@ -99,19 +99,20 @@ SECTION .text
|
|||||||
sec, sec_stride, height, sse
|
sec, sec_stride, height, sse
|
||||||
%define sec_str sec_strideq
|
%define sec_str sec_strideq
|
||||||
%else
|
%else
|
||||||
cglobal highbd_sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \
|
cglobal highbd_sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \
|
||||||
y_offset, dst, dst_stride, height, sse
|
x_offset, y_offset, \
|
||||||
|
dst, dst_stride, height, sse
|
||||||
%endif
|
%endif
|
||||||
%define block_height heightd
|
%define block_height heightd
|
||||||
%define bilin_filter sseq
|
%define bilin_filter sseq
|
||||||
%else
|
%else
|
||||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
%if CONFIG_PIC=1
|
||||||
%if %2 == 1 ; avg
|
%if %2 == 1 ; avg
|
||||||
cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
|
cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
|
||||||
x_offset, y_offset, \
|
x_offset, y_offset, \
|
||||||
dst, dst_stride, \
|
dst, dst_stride, \
|
||||||
sec, sec_stride, \
|
sec, sec_stride, height, sse, \
|
||||||
height, sse, g_bilin_filter, g_pw_8
|
g_bilin_filter, g_pw_8
|
||||||
%define block_height dword heightm
|
%define block_height dword heightm
|
||||||
%define sec_str sec_stridemp
|
%define sec_str sec_stridemp
|
||||||
|
|
||||||
@ -130,8 +131,9 @@ SECTION .text
|
|||||||
LOAD_IF_USED 0, 1 ; load eax, ecx back
|
LOAD_IF_USED 0, 1 ; load eax, ecx back
|
||||||
%else
|
%else
|
||||||
cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
|
cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
|
||||||
x_offset, y_offset, dst, dst_stride, height, \
|
x_offset, y_offset, \
|
||||||
sse, g_bilin_filter, g_pw_8
|
dst, dst_stride, height, sse, \
|
||||||
|
g_bilin_filter, g_pw_8
|
||||||
%define block_height heightd
|
%define block_height heightd
|
||||||
|
|
||||||
; Store bilin_filter and pw_8 location in stack
|
; Store bilin_filter and pw_8 location in stack
|
||||||
@ -150,22 +152,16 @@ SECTION .text
|
|||||||
%endif
|
%endif
|
||||||
%else
|
%else
|
||||||
%if %2 == 1 ; avg
|
%if %2 == 1 ; avg
|
||||||
cglobal highbd_sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
|
cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
|
||||||
7 + 2 * ARCH_X86_64, 13, src, src_stride, \
|
x_offset, y_offset, \
|
||||||
x_offset, y_offset, \
|
dst, dst_stride, \
|
||||||
dst, dst_stride, \
|
sec, sec_stride, height, sse
|
||||||
sec, sec_stride, \
|
|
||||||
height, sse
|
|
||||||
%if ARCH_X86_64
|
|
||||||
%define block_height heightd
|
|
||||||
%define sec_str sec_strideq
|
|
||||||
%else
|
|
||||||
%define block_height dword heightm
|
%define block_height dword heightm
|
||||||
%define sec_str sec_stridemp
|
%define sec_str sec_stridemp
|
||||||
%endif
|
|
||||||
%else
|
%else
|
||||||
cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
|
cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
|
||||||
x_offset, y_offset, dst, dst_stride, height, sse
|
x_offset, y_offset, \
|
||||||
|
dst, dst_stride, height, sse
|
||||||
%define block_height heightd
|
%define block_height heightd
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -284,14 +280,14 @@ SECTION .text
|
|||||||
|
|
||||||
.x_zero_y_nonhalf:
|
.x_zero_y_nonhalf:
|
||||||
; x_offset == 0 && y_offset == bilin interpolation
|
; x_offset == 0 && y_offset == bilin interpolation
|
||||||
%ifdef PIC
|
%if ARCH_X86_64
|
||||||
lea bilin_filter, [bilin_filter_m]
|
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||||
%endif
|
%endif
|
||||||
shl y_offsetd, filter_idx_shift
|
shl y_offsetd, filter_idx_shift
|
||||||
%if ARCH_X86_64 && mmsize == 16
|
%if ARCH_X86_64 && mmsize == 16
|
||||||
mova m8, [bilin_filter+y_offsetq]
|
mova m8, [bilin_filter+y_offsetq]
|
||||||
mova m9, [bilin_filter+y_offsetq+16]
|
mova m9, [bilin_filter+y_offsetq+16]
|
||||||
mova m10, [pw_8]
|
mova m10, [GLOBAL(pw_8)]
|
||||||
%define filter_y_a m8
|
%define filter_y_a m8
|
||||||
%define filter_y_b m9
|
%define filter_y_b m9
|
||||||
%define filter_rnd m10
|
%define filter_rnd m10
|
||||||
@ -308,7 +304,7 @@ SECTION .text
|
|||||||
add y_offsetq, bilin_filter
|
add y_offsetq, bilin_filter
|
||||||
%define filter_y_a [y_offsetq]
|
%define filter_y_a [y_offsetq]
|
||||||
%define filter_y_b [y_offsetq+16]
|
%define filter_y_b [y_offsetq+16]
|
||||||
%define filter_rnd [pw_8]
|
%define filter_rnd [GLOBAL(pw_8)]
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -511,14 +507,14 @@ SECTION .text
|
|||||||
|
|
||||||
.x_half_y_nonhalf:
|
.x_half_y_nonhalf:
|
||||||
; x_offset == 0.5 && y_offset == bilin interpolation
|
; x_offset == 0.5 && y_offset == bilin interpolation
|
||||||
%ifdef PIC
|
%if ARCH_X86_64
|
||||||
lea bilin_filter, [bilin_filter_m]
|
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||||
%endif
|
%endif
|
||||||
shl y_offsetd, filter_idx_shift
|
shl y_offsetd, filter_idx_shift
|
||||||
%if ARCH_X86_64 && mmsize == 16
|
%if ARCH_X86_64 && mmsize == 16
|
||||||
mova m8, [bilin_filter+y_offsetq]
|
mova m8, [bilin_filter+y_offsetq]
|
||||||
mova m9, [bilin_filter+y_offsetq+16]
|
mova m9, [bilin_filter+y_offsetq+16]
|
||||||
mova m10, [pw_8]
|
mova m10, [GLOBAL(pw_8)]
|
||||||
%define filter_y_a m8
|
%define filter_y_a m8
|
||||||
%define filter_y_b m9
|
%define filter_y_b m9
|
||||||
%define filter_rnd m10
|
%define filter_rnd m10
|
||||||
@ -535,7 +531,7 @@ SECTION .text
|
|||||||
add y_offsetq, bilin_filter
|
add y_offsetq, bilin_filter
|
||||||
%define filter_y_a [y_offsetq]
|
%define filter_y_a [y_offsetq]
|
||||||
%define filter_y_b [y_offsetq+16]
|
%define filter_y_b [y_offsetq+16]
|
||||||
%define filter_rnd [pw_8]
|
%define filter_rnd [GLOBAL(pw_8)]
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -633,14 +629,14 @@ SECTION .text
|
|||||||
jnz .x_nonhalf_y_nonzero
|
jnz .x_nonhalf_y_nonzero
|
||||||
|
|
||||||
; x_offset == bilin interpolation && y_offset == 0
|
; x_offset == bilin interpolation && y_offset == 0
|
||||||
%ifdef PIC
|
%if ARCH_X86_64
|
||||||
lea bilin_filter, [bilin_filter_m]
|
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||||
%endif
|
%endif
|
||||||
shl x_offsetd, filter_idx_shift
|
shl x_offsetd, filter_idx_shift
|
||||||
%if ARCH_X86_64 && mmsize == 16
|
%if ARCH_X86_64 && mmsize == 16
|
||||||
mova m8, [bilin_filter+x_offsetq]
|
mova m8, [bilin_filter+x_offsetq]
|
||||||
mova m9, [bilin_filter+x_offsetq+16]
|
mova m9, [bilin_filter+x_offsetq+16]
|
||||||
mova m10, [pw_8]
|
mova m10, [GLOBAL(pw_8)]
|
||||||
%define filter_x_a m8
|
%define filter_x_a m8
|
||||||
%define filter_x_b m9
|
%define filter_x_b m9
|
||||||
%define filter_rnd m10
|
%define filter_rnd m10
|
||||||
@ -657,7 +653,7 @@ SECTION .text
|
|||||||
add x_offsetq, bilin_filter
|
add x_offsetq, bilin_filter
|
||||||
%define filter_x_a [x_offsetq]
|
%define filter_x_a [x_offsetq]
|
||||||
%define filter_x_b [x_offsetq+16]
|
%define filter_x_b [x_offsetq+16]
|
||||||
%define filter_rnd [pw_8]
|
%define filter_rnd [GLOBAL(pw_8)]
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -732,14 +728,14 @@ SECTION .text
|
|||||||
jne .x_nonhalf_y_nonhalf
|
jne .x_nonhalf_y_nonhalf
|
||||||
|
|
||||||
; x_offset == bilin interpolation && y_offset == 0.5
|
; x_offset == bilin interpolation && y_offset == 0.5
|
||||||
%ifdef PIC
|
%if ARCH_X86_64
|
||||||
lea bilin_filter, [bilin_filter_m]
|
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||||
%endif
|
%endif
|
||||||
shl x_offsetd, filter_idx_shift
|
shl x_offsetd, filter_idx_shift
|
||||||
%if ARCH_X86_64 && mmsize == 16
|
%if ARCH_X86_64 && mmsize == 16
|
||||||
mova m8, [bilin_filter+x_offsetq]
|
mova m8, [bilin_filter+x_offsetq]
|
||||||
mova m9, [bilin_filter+x_offsetq+16]
|
mova m9, [bilin_filter+x_offsetq+16]
|
||||||
mova m10, [pw_8]
|
mova m10, [GLOBAL(pw_8)]
|
||||||
%define filter_x_a m8
|
%define filter_x_a m8
|
||||||
%define filter_x_b m9
|
%define filter_x_b m9
|
||||||
%define filter_rnd m10
|
%define filter_rnd m10
|
||||||
@ -756,7 +752,7 @@ SECTION .text
|
|||||||
add x_offsetq, bilin_filter
|
add x_offsetq, bilin_filter
|
||||||
%define filter_x_a [x_offsetq]
|
%define filter_x_a [x_offsetq]
|
||||||
%define filter_x_b [x_offsetq+16]
|
%define filter_x_b [x_offsetq+16]
|
||||||
%define filter_rnd [pw_8]
|
%define filter_rnd [GLOBAL(pw_8)]
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -859,8 +855,8 @@ SECTION .text
|
|||||||
|
|
||||||
.x_nonhalf_y_nonhalf:
|
.x_nonhalf_y_nonhalf:
|
||||||
; loading filter - this is same as in 8-bit depth
|
; loading filter - this is same as in 8-bit depth
|
||||||
%ifdef PIC
|
%if ARCH_X86_64
|
||||||
lea bilin_filter, [bilin_filter_m]
|
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||||
%endif
|
%endif
|
||||||
shl x_offsetd, filter_idx_shift ; filter_idx_shift = 5
|
shl x_offsetd, filter_idx_shift ; filter_idx_shift = 5
|
||||||
shl y_offsetd, filter_idx_shift
|
shl y_offsetd, filter_idx_shift
|
||||||
@ -869,7 +865,7 @@ SECTION .text
|
|||||||
mova m9, [bilin_filter+x_offsetq+16]
|
mova m9, [bilin_filter+x_offsetq+16]
|
||||||
mova m10, [bilin_filter+y_offsetq]
|
mova m10, [bilin_filter+y_offsetq]
|
||||||
mova m11, [bilin_filter+y_offsetq+16]
|
mova m11, [bilin_filter+y_offsetq+16]
|
||||||
mova m12, [pw_8]
|
mova m12, [GLOBAL(pw_8)]
|
||||||
%define filter_x_a m8
|
%define filter_x_a m8
|
||||||
%define filter_x_b m9
|
%define filter_x_b m9
|
||||||
%define filter_y_a m10
|
%define filter_y_a m10
|
||||||
@ -897,7 +893,7 @@ SECTION .text
|
|||||||
%define filter_x_b [x_offsetq+16]
|
%define filter_x_b [x_offsetq+16]
|
||||||
%define filter_y_a [y_offsetq]
|
%define filter_y_a [y_offsetq]
|
||||||
%define filter_y_b [y_offsetq+16]
|
%define filter_y_b [y_offsetq+16]
|
||||||
%define filter_rnd [pw_8]
|
%define filter_rnd [GLOBAL(pw_8)]
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
; end of load filter
|
; end of load filter
|
||||||
|
@ -114,27 +114,26 @@ SECTION .text
|
|||||||
; 11, not 13, if the registers are ordered correctly. May make a minor speed
|
; 11, not 13, if the registers are ordered correctly. May make a minor speed
|
||||||
; difference on Win64
|
; difference on Win64
|
||||||
|
|
||||||
%ifdef PIC ; 64bit PIC
|
%if ARCH_X86_64
|
||||||
%if %2 == 1 ; avg
|
%if %2 == 1 ; avg
|
||||||
cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
|
cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
|
||||||
x_offset, y_offset, \
|
x_offset, y_offset, dst, dst_stride, \
|
||||||
dst, dst_stride, \
|
sec, sec_stride, height, sse
|
||||||
sec, sec_stride, height, sse
|
|
||||||
%define sec_str sec_strideq
|
%define sec_str sec_strideq
|
||||||
%else
|
%else
|
||||||
cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \
|
cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \
|
||||||
y_offset, dst, dst_stride, height, sse
|
x_offset, y_offset, dst, dst_stride, \
|
||||||
|
height, sse
|
||||||
%endif
|
%endif
|
||||||
%define block_height heightd
|
%define block_height heightd
|
||||||
%define bilin_filter sseq
|
%define bilin_filter sseq
|
||||||
%else
|
%else
|
||||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
%if CONFIG_PIC=1
|
||||||
%if %2 == 1 ; avg
|
%if %2 == 1 ; avg
|
||||||
cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
|
cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
|
||||||
x_offset, y_offset, \
|
x_offset, y_offset, dst, dst_stride, \
|
||||||
dst, dst_stride, \
|
sec, sec_stride, height, sse, \
|
||||||
sec, sec_stride, \
|
g_bilin_filter, g_pw_8
|
||||||
height, sse, g_bilin_filter, g_pw_8
|
|
||||||
%define block_height dword heightm
|
%define block_height dword heightm
|
||||||
%define sec_str sec_stridemp
|
%define sec_str sec_stridemp
|
||||||
|
|
||||||
@ -152,9 +151,9 @@ SECTION .text
|
|||||||
|
|
||||||
LOAD_IF_USED 0, 1 ; load eax, ecx back
|
LOAD_IF_USED 0, 1 ; load eax, ecx back
|
||||||
%else
|
%else
|
||||||
cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
|
cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
|
||||||
y_offset, dst, dst_stride, height, sse, \
|
x_offset, y_offset, dst, dst_stride, \
|
||||||
g_bilin_filter, g_pw_8
|
height, sse, g_bilin_filter, g_pw_8
|
||||||
%define block_height heightd
|
%define block_height heightd
|
||||||
|
|
||||||
;Store bilin_filter and pw_8 location in stack
|
;Store bilin_filter and pw_8 location in stack
|
||||||
@ -173,25 +172,18 @@ SECTION .text
|
|||||||
%endif
|
%endif
|
||||||
%else
|
%else
|
||||||
%if %2 == 1 ; avg
|
%if %2 == 1 ; avg
|
||||||
cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
|
cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
|
||||||
7 + 2 * ARCH_X86_64, 13, src, src_stride, \
|
x_offset, y_offset, \
|
||||||
x_offset, y_offset, \
|
dst, dst_stride, sec, sec_stride, \
|
||||||
dst, dst_stride, \
|
height, sse
|
||||||
sec, sec_stride, \
|
|
||||||
height, sse
|
|
||||||
%if ARCH_X86_64
|
|
||||||
%define block_height heightd
|
|
||||||
%define sec_str sec_strideq
|
|
||||||
%else
|
|
||||||
%define block_height dword heightm
|
%define block_height dword heightm
|
||||||
%define sec_str sec_stridemp
|
%define sec_str sec_stridemp
|
||||||
%endif
|
|
||||||
%else
|
%else
|
||||||
cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
|
cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
|
||||||
y_offset, dst, dst_stride, height, sse
|
x_offset, y_offset, dst, dst_stride, \
|
||||||
|
height, sse
|
||||||
%define block_height heightd
|
%define block_height heightd
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%define bilin_filter bilin_filter_m
|
%define bilin_filter bilin_filter_m
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
@ -371,8 +363,8 @@ SECTION .text
|
|||||||
|
|
||||||
.x_zero_y_nonhalf:
|
.x_zero_y_nonhalf:
|
||||||
; x_offset == 0 && y_offset == bilin interpolation
|
; x_offset == 0 && y_offset == bilin interpolation
|
||||||
%ifdef PIC
|
%if ARCH_X86_64
|
||||||
lea bilin_filter, [bilin_filter_m]
|
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||||
%endif
|
%endif
|
||||||
shl y_offsetd, filter_idx_shift
|
shl y_offsetd, filter_idx_shift
|
||||||
%if ARCH_X86_64 && %1 > 4
|
%if ARCH_X86_64 && %1 > 4
|
||||||
@ -380,7 +372,7 @@ SECTION .text
|
|||||||
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
||||||
mova m9, [bilin_filter+y_offsetq+16]
|
mova m9, [bilin_filter+y_offsetq+16]
|
||||||
%endif
|
%endif
|
||||||
mova m10, [pw_8]
|
mova m10, [GLOBAL(pw_8)]
|
||||||
%define filter_y_a m8
|
%define filter_y_a m8
|
||||||
%define filter_y_b m9
|
%define filter_y_b m9
|
||||||
%define filter_rnd m10
|
%define filter_rnd m10
|
||||||
@ -397,7 +389,7 @@ SECTION .text
|
|||||||
add y_offsetq, bilin_filter
|
add y_offsetq, bilin_filter
|
||||||
%define filter_y_a [y_offsetq]
|
%define filter_y_a [y_offsetq]
|
||||||
%define filter_y_b [y_offsetq+16]
|
%define filter_y_b [y_offsetq+16]
|
||||||
%define filter_rnd [pw_8]
|
%define filter_rnd [GLOBAL(pw_8)]
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -694,8 +686,8 @@ SECTION .text
|
|||||||
|
|
||||||
.x_half_y_nonhalf:
|
.x_half_y_nonhalf:
|
||||||
; x_offset == 0.5 && y_offset == bilin interpolation
|
; x_offset == 0.5 && y_offset == bilin interpolation
|
||||||
%ifdef PIC
|
%if ARCH_X86_64
|
||||||
lea bilin_filter, [bilin_filter_m]
|
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||||
%endif
|
%endif
|
||||||
shl y_offsetd, filter_idx_shift
|
shl y_offsetd, filter_idx_shift
|
||||||
%if ARCH_X86_64 && %1 > 4
|
%if ARCH_X86_64 && %1 > 4
|
||||||
@ -703,7 +695,7 @@ SECTION .text
|
|||||||
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
||||||
mova m9, [bilin_filter+y_offsetq+16]
|
mova m9, [bilin_filter+y_offsetq+16]
|
||||||
%endif
|
%endif
|
||||||
mova m10, [pw_8]
|
mova m10, [GLOBAL(pw_8)]
|
||||||
%define filter_y_a m8
|
%define filter_y_a m8
|
||||||
%define filter_y_b m9
|
%define filter_y_b m9
|
||||||
%define filter_rnd m10
|
%define filter_rnd m10
|
||||||
@ -720,7 +712,7 @@ SECTION .text
|
|||||||
add y_offsetq, bilin_filter
|
add y_offsetq, bilin_filter
|
||||||
%define filter_y_a [y_offsetq]
|
%define filter_y_a [y_offsetq]
|
||||||
%define filter_y_b [y_offsetq+16]
|
%define filter_y_b [y_offsetq+16]
|
||||||
%define filter_rnd [pw_8]
|
%define filter_rnd [GLOBAL(pw_8)]
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -852,8 +844,8 @@ SECTION .text
|
|||||||
jnz .x_nonhalf_y_nonzero
|
jnz .x_nonhalf_y_nonzero
|
||||||
|
|
||||||
; x_offset == bilin interpolation && y_offset == 0
|
; x_offset == bilin interpolation && y_offset == 0
|
||||||
%ifdef PIC
|
%if ARCH_X86_64
|
||||||
lea bilin_filter, [bilin_filter_m]
|
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||||
%endif
|
%endif
|
||||||
shl x_offsetd, filter_idx_shift
|
shl x_offsetd, filter_idx_shift
|
||||||
%if ARCH_X86_64 && %1 > 4
|
%if ARCH_X86_64 && %1 > 4
|
||||||
@ -861,7 +853,7 @@ SECTION .text
|
|||||||
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
||||||
mova m9, [bilin_filter+x_offsetq+16]
|
mova m9, [bilin_filter+x_offsetq+16]
|
||||||
%endif
|
%endif
|
||||||
mova m10, [pw_8]
|
mova m10, [GLOBAL(pw_8)]
|
||||||
%define filter_x_a m8
|
%define filter_x_a m8
|
||||||
%define filter_x_b m9
|
%define filter_x_b m9
|
||||||
%define filter_rnd m10
|
%define filter_rnd m10
|
||||||
@ -878,7 +870,7 @@ SECTION .text
|
|||||||
add x_offsetq, bilin_filter
|
add x_offsetq, bilin_filter
|
||||||
%define filter_x_a [x_offsetq]
|
%define filter_x_a [x_offsetq]
|
||||||
%define filter_x_b [x_offsetq+16]
|
%define filter_x_b [x_offsetq+16]
|
||||||
%define filter_rnd [pw_8]
|
%define filter_rnd [GLOBAL(pw_8)]
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -994,8 +986,8 @@ SECTION .text
|
|||||||
jne .x_nonhalf_y_nonhalf
|
jne .x_nonhalf_y_nonhalf
|
||||||
|
|
||||||
; x_offset == bilin interpolation && y_offset == 0.5
|
; x_offset == bilin interpolation && y_offset == 0.5
|
||||||
%ifdef PIC
|
%if ARCH_X86_64
|
||||||
lea bilin_filter, [bilin_filter_m]
|
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||||
%endif
|
%endif
|
||||||
shl x_offsetd, filter_idx_shift
|
shl x_offsetd, filter_idx_shift
|
||||||
%if ARCH_X86_64 && %1 > 4
|
%if ARCH_X86_64 && %1 > 4
|
||||||
@ -1003,7 +995,7 @@ SECTION .text
|
|||||||
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
||||||
mova m9, [bilin_filter+x_offsetq+16]
|
mova m9, [bilin_filter+x_offsetq+16]
|
||||||
%endif
|
%endif
|
||||||
mova m10, [pw_8]
|
mova m10, [GLOBAL(pw_8)]
|
||||||
%define filter_x_a m8
|
%define filter_x_a m8
|
||||||
%define filter_x_b m9
|
%define filter_x_b m9
|
||||||
%define filter_rnd m10
|
%define filter_rnd m10
|
||||||
@ -1020,7 +1012,7 @@ SECTION .text
|
|||||||
add x_offsetq, bilin_filter
|
add x_offsetq, bilin_filter
|
||||||
%define filter_x_a [x_offsetq]
|
%define filter_x_a [x_offsetq]
|
||||||
%define filter_x_b [x_offsetq+16]
|
%define filter_x_b [x_offsetq+16]
|
||||||
%define filter_rnd [pw_8]
|
%define filter_rnd [GLOBAL(pw_8)]
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -1192,8 +1184,8 @@ SECTION .text
|
|||||||
STORE_AND_RET %1
|
STORE_AND_RET %1
|
||||||
|
|
||||||
.x_nonhalf_y_nonhalf:
|
.x_nonhalf_y_nonhalf:
|
||||||
%ifdef PIC
|
%if ARCH_X86_64
|
||||||
lea bilin_filter, [bilin_filter_m]
|
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||||
%endif
|
%endif
|
||||||
shl x_offsetd, filter_idx_shift
|
shl x_offsetd, filter_idx_shift
|
||||||
shl y_offsetd, filter_idx_shift
|
shl y_offsetd, filter_idx_shift
|
||||||
@ -1206,7 +1198,7 @@ SECTION .text
|
|||||||
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
||||||
mova m11, [bilin_filter+y_offsetq+16]
|
mova m11, [bilin_filter+y_offsetq+16]
|
||||||
%endif
|
%endif
|
||||||
mova m12, [pw_8]
|
mova m12, [GLOBAL(pw_8)]
|
||||||
%define filter_x_a m8
|
%define filter_x_a m8
|
||||||
%define filter_x_b m9
|
%define filter_x_b m9
|
||||||
%define filter_y_a m10
|
%define filter_y_a m10
|
||||||
@ -1234,7 +1226,7 @@ SECTION .text
|
|||||||
%define filter_x_b [x_offsetq+16]
|
%define filter_x_b [x_offsetq+16]
|
||||||
%define filter_y_a [y_offsetq]
|
%define filter_y_a [y_offsetq]
|
||||||
%define filter_y_b [y_offsetq+16]
|
%define filter_y_b [y_offsetq+16]
|
||||||
%define filter_rnd [pw_8]
|
%define filter_rnd [GLOBAL(pw_8)]
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user