SSE/SSE2 assembly for 4x4/8x8/16x16/32x32 V intra prediction.
Change-Id: I55a6cfa2daba738cbc0c4a02f806893f7e556997
This commit is contained in:
@@ -74,7 +74,7 @@ prototype void vp9_d153_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, u
|
|||||||
specialize vp9_d153_predictor_4x4
|
specialize vp9_d153_predictor_4x4
|
||||||
|
|
||||||
prototype void vp9_v_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
prototype void vp9_v_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
||||||
specialize vp9_v_predictor_4x4
|
specialize vp9_v_predictor_4x4 sse
|
||||||
|
|
||||||
prototype void vp9_tm_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
prototype void vp9_tm_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
||||||
specialize vp9_tm_predictor_4x4
|
specialize vp9_tm_predictor_4x4
|
||||||
@@ -113,7 +113,7 @@ prototype void vp9_d153_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, u
|
|||||||
specialize vp9_d153_predictor_8x8
|
specialize vp9_d153_predictor_8x8
|
||||||
|
|
||||||
prototype void vp9_v_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
prototype void vp9_v_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
||||||
specialize vp9_v_predictor_8x8
|
specialize vp9_v_predictor_8x8 sse
|
||||||
|
|
||||||
prototype void vp9_tm_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
prototype void vp9_tm_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
||||||
specialize vp9_tm_predictor_8x8
|
specialize vp9_tm_predictor_8x8
|
||||||
@@ -152,7 +152,7 @@ prototype void vp9_d153_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride,
|
|||||||
specialize vp9_d153_predictor_16x16
|
specialize vp9_d153_predictor_16x16
|
||||||
|
|
||||||
prototype void vp9_v_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
prototype void vp9_v_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
||||||
specialize vp9_v_predictor_16x16
|
specialize vp9_v_predictor_16x16 sse2
|
||||||
|
|
||||||
prototype void vp9_tm_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
prototype void vp9_tm_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
||||||
specialize vp9_tm_predictor_16x16
|
specialize vp9_tm_predictor_16x16
|
||||||
@@ -191,7 +191,7 @@ prototype void vp9_d153_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride,
|
|||||||
specialize vp9_d153_predictor_32x32
|
specialize vp9_d153_predictor_32x32
|
||||||
|
|
||||||
prototype void vp9_v_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
prototype void vp9_v_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
||||||
specialize vp9_v_predictor_32x32
|
specialize vp9_v_predictor_32x32 sse2
|
||||||
|
|
||||||
prototype void vp9_tm_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
prototype void vp9_tm_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col"
|
||||||
specialize vp9_tm_predictor_32x32
|
specialize vp9_tm_predictor_32x32
|
||||||
|
@@ -125,3 +125,66 @@ cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left
|
|||||||
dec lines4d
|
dec lines4d
|
||||||
jnz .loop
|
jnz .loop
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
|
INIT_MMX sse
|
||||||
|
cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above
|
||||||
|
movd m0, [aboveq]
|
||||||
|
movd [dstq ], m0
|
||||||
|
movd [dstq+strideq], m0
|
||||||
|
lea dstq, [dstq+strideq*2]
|
||||||
|
movd [dstq ], m0
|
||||||
|
movd [dstq+strideq], m0
|
||||||
|
RET
|
||||||
|
|
||||||
|
INIT_MMX sse
|
||||||
|
cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above
|
||||||
|
movq m0, [aboveq]
|
||||||
|
DEFINE_ARGS dst, stride, stride3
|
||||||
|
lea stride3q, [strideq*3]
|
||||||
|
movq [dstq ], m0
|
||||||
|
movq [dstq+strideq ], m0
|
||||||
|
movq [dstq+strideq*2], m0
|
||||||
|
movq [dstq+stride3q ], m0
|
||||||
|
lea dstq, [dstq+strideq*4]
|
||||||
|
movq [dstq ], m0
|
||||||
|
movq [dstq+strideq ], m0
|
||||||
|
movq [dstq+strideq*2], m0
|
||||||
|
movq [dstq+stride3q ], m0
|
||||||
|
RET
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
|
cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above
|
||||||
|
mova m0, [aboveq]
|
||||||
|
DEFINE_ARGS dst, stride, stride3, nlines4
|
||||||
|
lea stride3q, [strideq*3]
|
||||||
|
mov nlines4d, 4
|
||||||
|
.loop:
|
||||||
|
mova [dstq ], m0
|
||||||
|
mova [dstq+strideq ], m0
|
||||||
|
mova [dstq+strideq*2], m0
|
||||||
|
mova [dstq+stride3q ], m0
|
||||||
|
lea dstq, [dstq+strideq*4]
|
||||||
|
dec nlines4d
|
||||||
|
jnz .loop
|
||||||
|
REP_RET
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
|
cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above
|
||||||
|
mova m0, [aboveq]
|
||||||
|
mova m1, [aboveq+16]
|
||||||
|
DEFINE_ARGS dst, stride, stride3, nlines4
|
||||||
|
lea stride3q, [strideq*3]
|
||||||
|
mov nlines4d, 8
|
||||||
|
.loop:
|
||||||
|
mova [dstq ], m0
|
||||||
|
mova [dstq +16], m1
|
||||||
|
mova [dstq+strideq ], m0
|
||||||
|
mova [dstq+strideq +16], m1
|
||||||
|
mova [dstq+strideq*2 ], m0
|
||||||
|
mova [dstq+strideq*2+16], m1
|
||||||
|
mova [dstq+stride3q ], m0
|
||||||
|
mova [dstq+stride3q +16], m1
|
||||||
|
lea dstq, [dstq+strideq*4]
|
||||||
|
dec nlines4d
|
||||||
|
jnz .loop
|
||||||
|
REP_RET
|
||||||
|
Reference in New Issue
Block a user