vpx/vp9/common/x86/vp9_recon_sse2.asm
Ronald S. Bultje 8350e7fe38 Make intra prediction pointers RTCD-based.
This probably has a mildly negative impact on performance, but will
(in future commits - or possibly merged with this one) allow SIMD
implementations of individual intra prediction functions. We may
perhaps want to consider having separate functions per txfm-size
also (i.e. 4x4, 8x8, 16x16 and 32x32 intra prediction functions for
each intra prediction mode), but I haven't played much with that
yet.

Change-Id: Ie739985eee0a3fcbb7aed29ee6910fdb653ea269
2013-07-08 17:25:51 -07:00

116 lines
3.0 KiB
NASM

;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;void copy_mem16x16_sse2(
; unsigned char *src,
; int src_stride,
; unsigned char *dst,
; int dst_stride
; )
global sym(vp9_copy_mem16x16_sse2) PRIVATE
sym(vp9_copy_mem16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src;
movdqu xmm0, [rsi]
movsxd rax, dword ptr arg(1) ;src_stride;
mov rdi, arg(2) ;dst;
movdqu xmm1, [rsi+rax]
movdqu xmm2, [rsi+rax*2]
movsxd rcx, dword ptr arg(3) ;dst_stride
lea rsi, [rsi+rax*2]
movdqa [rdi], xmm0
add rsi, rax
movdqa [rdi+rcx], xmm1
movdqa [rdi+rcx*2],xmm2
lea rdi, [rdi+rcx*2]
movdqu xmm3, [rsi]
add rdi, rcx
movdqu xmm4, [rsi+rax]
movdqu xmm5, [rsi+rax*2]
lea rsi, [rsi+rax*2]
movdqa [rdi], xmm3
add rsi, rax
movdqa [rdi+rcx], xmm4
movdqa [rdi+rcx*2],xmm5
lea rdi, [rdi+rcx*2]
movdqu xmm0, [rsi]
add rdi, rcx
movdqu xmm1, [rsi+rax]
movdqu xmm2, [rsi+rax*2]
lea rsi, [rsi+rax*2]
movdqa [rdi], xmm0
add rsi, rax
movdqa [rdi+rcx], xmm1
movdqa [rdi+rcx*2], xmm2
movdqu xmm3, [rsi]
movdqu xmm4, [rsi+rax]
lea rdi, [rdi+rcx*2]
add rdi, rcx
movdqu xmm5, [rsi+rax*2]
lea rsi, [rsi+rax*2]
movdqa [rdi], xmm3
add rsi, rax
movdqa [rdi+rcx], xmm4
movdqa [rdi+rcx*2],xmm5
movdqu xmm0, [rsi]
lea rdi, [rdi+rcx*2]
movdqu xmm1, [rsi+rax]
add rdi, rcx
movdqu xmm2, [rsi+rax*2]
lea rsi, [rsi+rax*2]
movdqa [rdi], xmm0
movdqa [rdi+rcx], xmm1
movdqa [rdi+rcx*2],xmm2
movdqu xmm3, [rsi+rax]
lea rdi, [rdi+rcx*2]
movdqa [rdi+rcx], xmm3
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret