Compare commits
14 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
4dae3ca262 | ||
![]() |
1a23086bc6 | ||
![]() |
363a67c601 | ||
![]() |
5205d299bb | ||
![]() |
2926571be6 | ||
![]() |
e343988f9d | ||
![]() |
cab6ac16e0 | ||
![]() |
b073e3cdd4 | ||
![]() |
dd6134b472 | ||
![]() |
fc2fc899ae | ||
![]() |
f78e5a04e6 | ||
![]() |
5715c39cf8 | ||
![]() |
4890853010 | ||
![]() |
cf0970157d |
@@ -967,7 +967,7 @@ process_common_toolchain() {
|
||||
esac
|
||||
;;
|
||||
gcc*)
|
||||
add_cflags -m${bits}
|
||||
add_cflags -m${bits}
|
||||
add_ldflags -m${bits}
|
||||
link_with_cc=gcc
|
||||
tune_cflags="-march="
|
||||
|
@@ -680,7 +680,7 @@ int parse_coff(uint8_t *buf, size_t sz)
|
||||
uint32_t symoffset;
|
||||
|
||||
char **sectionlist; //this array holds all section names in their correct order.
|
||||
//it is used to check if the symbol is in .bss or .data section.
|
||||
//it is used to check if the symbol is in .bss or .rdata section.
|
||||
|
||||
nsections = get_le16(buf + 2);
|
||||
symtab_ptr = get_le32(buf + 8);
|
||||
@@ -725,15 +725,15 @@ int parse_coff(uint8_t *buf, size_t sz)
|
||||
}
|
||||
strcpy(sectionlist[i], sectionname);
|
||||
|
||||
if (!strcmp(sectionname, ".data")) sectionrawdata_ptr = get_le32(ptr + 20);
|
||||
if (!strcmp(sectionname, ".rdata")) sectionrawdata_ptr = get_le32(ptr + 20);
|
||||
|
||||
ptr += 40;
|
||||
}
|
||||
|
||||
//log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
|
||||
//log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr);
|
||||
//log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
|
||||
|
||||
/* The compiler puts the data with non-zero offset in .data section, but puts the data with
|
||||
/* The compiler puts the data with non-zero offset in .rdata section, but puts the data with
|
||||
zero offset in .bss section. So, if the data in in .bss section, set offset=0.
|
||||
Note from Wiki: In an object module compiled from C, the bss section contains
|
||||
the local variables (but not functions) that were declared with the static keyword,
|
||||
|
@@ -211,6 +211,8 @@ common_top() {
|
||||
$(process_forward_decls)
|
||||
|
||||
$(declare_function_pointers c $ALL_ARCHS)
|
||||
|
||||
void ${symbol:-rtcd}(void);
|
||||
EOF
|
||||
}
|
||||
|
||||
@@ -231,11 +233,10 @@ x86() {
|
||||
|
||||
cat <<EOF
|
||||
$(common_top)
|
||||
void ${symbol:-rtcd}(void);
|
||||
|
||||
#ifdef RTCD_C
|
||||
#include "vpx_ports/x86.h"
|
||||
void ${symbol:-rtcd}(void)
|
||||
static void setup_rtcd_internal(void)
|
||||
{
|
||||
int flags = x86_simd_caps();
|
||||
|
||||
@@ -261,11 +262,9 @@ arm() {
|
||||
$(common_top)
|
||||
#include "vpx_config.h"
|
||||
|
||||
void ${symbol:-rtcd}(void);
|
||||
|
||||
#ifdef RTCD_C
|
||||
#include "vpx_ports/arm.h"
|
||||
void ${symbol:-rtcd}(void)
|
||||
static void setup_rtcd_internal(void)
|
||||
{
|
||||
int flags = arm_cpu_caps();
|
||||
|
||||
@@ -285,10 +284,8 @@ unoptimized() {
|
||||
$(common_top)
|
||||
#include "vpx_config.h"
|
||||
|
||||
void ${symbol:-rtcd}(void);
|
||||
|
||||
#ifdef RTCD_C
|
||||
void ${symbol:-rtcd}(void)
|
||||
static void setup_rtcd_internal(void)
|
||||
{
|
||||
$(set_function_pointers c)
|
||||
}
|
||||
|
@@ -216,12 +216,6 @@ typedef struct macroblockd
|
||||
MODE_INFO *mode_info_context;
|
||||
int mode_info_stride;
|
||||
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
MB_PREDICTION_MODE best_sse_inter_mode;
|
||||
int_mv best_sse_mv;
|
||||
unsigned char need_to_clamp_best_mvs;
|
||||
#endif
|
||||
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int up_available;
|
||||
|
@@ -83,57 +83,6 @@ static int get_cpu_count()
|
||||
#endif
|
||||
|
||||
|
||||
#if HAVE_PTHREAD_H
|
||||
#include <pthread.h>
|
||||
static void once(void (*func)(void))
|
||||
{
|
||||
static pthread_once_t lock = PTHREAD_ONCE_INIT;
|
||||
pthread_once(&lock, func);
|
||||
}
|
||||
|
||||
|
||||
#elif defined(_WIN32)
|
||||
static void once(void (*func)(void))
|
||||
{
|
||||
/* Using a static initializer here rather than InitializeCriticalSection()
|
||||
* since there's no race-free context in which to execute it. Protecting
|
||||
* it with an atomic op like InterlockedCompareExchangePointer introduces
|
||||
* an x86 dependency, and InitOnceExecuteOnce requires Vista.
|
||||
*/
|
||||
static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0};
|
||||
static int done;
|
||||
|
||||
EnterCriticalSection(&lock);
|
||||
|
||||
if (!done)
|
||||
{
|
||||
func();
|
||||
done = 1;
|
||||
}
|
||||
|
||||
LeaveCriticalSection(&lock);
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
/* No-op version that performs no synchronization. vpx_rtcd() is idempotent,
|
||||
* so as long as your platform provides atomic loads/stores of pointers
|
||||
* no synchronization is strictly necessary.
|
||||
*/
|
||||
|
||||
static void once(void (*func)(void))
|
||||
{
|
||||
static int done;
|
||||
|
||||
if(!done)
|
||||
{
|
||||
func();
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
@@ -145,6 +94,4 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
#elif ARCH_X86 || ARCH_X86_64
|
||||
ctx->cpu_caps = x86_simd_caps();
|
||||
#endif
|
||||
|
||||
once(vpx_rtcd);
|
||||
}
|
||||
|
@@ -196,18 +196,14 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_frame
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd
|
||||
)
|
||||
void vp8_loop_filter_frame(VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int frame_type)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
loop_filter_info lfi;
|
||||
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int mb_rows = cm->mb_rows;
|
||||
|
@@ -76,7 +76,8 @@ void vp8_loop_filter_frame_init(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd);
|
||||
void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd,
|
||||
int frame_type);
|
||||
|
||||
void vp8_loop_filter_partial_frame(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
|
@@ -106,7 +106,7 @@ extern "C"
|
||||
int Width; // width of data passed to the compressor
|
||||
int Height; // height of data passed to the compressor
|
||||
struct vpx_rational timebase;
|
||||
int target_bandwidth; // bandwidth to be used in kilobits per second
|
||||
unsigned int target_bandwidth; // bandwidth to be used in kilobits per second
|
||||
|
||||
int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0
|
||||
int Sharpness; // parameter used for sharpening output: recommendation 0:
|
||||
|
@@ -10,3 +10,60 @@
|
||||
#include "vpx_config.h"
|
||||
#define RTCD_C
|
||||
#include "vpx_rtcd.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD && HAVE_PTHREAD_H
|
||||
#include <pthread.h>
|
||||
static void once(void (*func)(void))
|
||||
{
|
||||
static pthread_once_t lock = PTHREAD_ONCE_INIT;
|
||||
pthread_once(&lock, func);
|
||||
}
|
||||
|
||||
|
||||
#elif CONFIG_MULTITHREAD && defined(_WIN32)
|
||||
#include <windows.h>
|
||||
static void once(void (*func)(void))
|
||||
{
|
||||
/* Using a static initializer here rather than InitializeCriticalSection()
|
||||
* since there's no race-free context in which to execute it. Protecting
|
||||
* it with an atomic op like InterlockedCompareExchangePointer introduces
|
||||
* an x86 dependency, and InitOnceExecuteOnce requires Vista.
|
||||
*/
|
||||
static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0};
|
||||
static int done;
|
||||
|
||||
EnterCriticalSection(&lock);
|
||||
|
||||
if (!done)
|
||||
{
|
||||
func();
|
||||
done = 1;
|
||||
}
|
||||
|
||||
LeaveCriticalSection(&lock);
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
/* No-op version that performs no synchronization. vpx_rtcd() is idempotent,
|
||||
* so as long as your platform provides atomic loads/stores of pointers
|
||||
* no synchronization is strictly necessary.
|
||||
*/
|
||||
|
||||
static void once(void (*func)(void))
|
||||
{
|
||||
static int done;
|
||||
|
||||
if(!done)
|
||||
{
|
||||
func();
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void vpx_rtcd()
|
||||
{
|
||||
once(setup_rtcd_internal);
|
||||
}
|
||||
|
@@ -501,6 +501,14 @@ fi
|
||||
prototype void vp8_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
|
||||
specialize vp8_yv12_copy_partial_frame neon
|
||||
|
||||
#
|
||||
# Denoiser filter
|
||||
#
|
||||
if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then
|
||||
prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
|
||||
specialize vp8_denoiser_filter sse2
|
||||
fi
|
||||
|
||||
# End of encoder only functions
|
||||
fi
|
||||
|
||||
|
@@ -13,7 +13,7 @@
|
||||
|
||||
|
||||
;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
|
||||
global sym(vp8_dequantize_b_impl_mmx)
|
||||
global sym(vp8_dequantize_b_impl_mmx) PRIVATE
|
||||
sym(vp8_dequantize_b_impl_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -55,7 +55,7 @@ sym(vp8_dequantize_b_impl_mmx):
|
||||
;short *dq, 1
|
||||
;unsigned char *dest, 2
|
||||
;int stride) 3
|
||||
global sym(vp8_dequant_idct_add_mmx)
|
||||
global sym(vp8_dequant_idct_add_mmx) PRIVATE
|
||||
sym(vp8_dequant_idct_add_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -34,7 +34,7 @@
|
||||
|
||||
;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred,
|
||||
;int pitch, unsigned char *dest,int stride)
|
||||
global sym(vp8_short_idct4x4llm_mmx)
|
||||
global sym(vp8_short_idct4x4llm_mmx) PRIVATE
|
||||
sym(vp8_short_idct4x4llm_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -224,7 +224,7 @@ sym(vp8_short_idct4x4llm_mmx):
|
||||
;int pred_stride,
|
||||
;unsigned char *dst_ptr,
|
||||
;int stride)
|
||||
global sym(vp8_dc_only_idct_add_mmx)
|
||||
global sym(vp8_dc_only_idct_add_mmx) PRIVATE
|
||||
sym(vp8_dc_only_idct_add_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -19,7 +19,7 @@
|
||||
; int dst_stride - 3
|
||||
; )
|
||||
|
||||
global sym(vp8_idct_dequant_0_2x_sse2)
|
||||
global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE
|
||||
sym(vp8_idct_dequant_0_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -101,7 +101,7 @@ sym(vp8_idct_dequant_0_2x_sse2):
|
||||
; unsigned char *dst - 2
|
||||
; int dst_stride - 3
|
||||
; )
|
||||
global sym(vp8_idct_dequant_full_2x_sse2)
|
||||
global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE
|
||||
sym(vp8_idct_dequant_full_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -358,7 +358,7 @@ sym(vp8_idct_dequant_full_2x_sse2):
|
||||
; int dst_stride - 3
|
||||
; short *dc - 4
|
||||
; )
|
||||
global sym(vp8_idct_dequant_dc_0_2x_sse2)
|
||||
global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE
|
||||
sym(vp8_idct_dequant_dc_0_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -434,7 +434,7 @@ sym(vp8_idct_dequant_dc_0_2x_sse2):
|
||||
; int dst_stride - 3
|
||||
; short *dc - 4
|
||||
; )
|
||||
global sym(vp8_idct_dequant_dc_full_2x_sse2)
|
||||
global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE
|
||||
sym(vp8_idct_dequant_dc_full_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -12,7 +12,7 @@
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp8_short_inv_walsh4x4_mmx(short *input, short *output)
|
||||
global sym(vp8_short_inv_walsh4x4_mmx)
|
||||
global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE
|
||||
sym(vp8_short_inv_walsh4x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -12,7 +12,7 @@
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp8_short_inv_walsh4x4_sse2(short *input, short *output)
|
||||
global sym(vp8_short_inv_walsh4x4_sse2)
|
||||
global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE
|
||||
sym(vp8_short_inv_walsh4x4_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -133,7 +133,7 @@
|
||||
; const char *limit,
|
||||
; const char *thresh
|
||||
;)
|
||||
global sym(vp8_loop_filter_bh_y_sse2)
|
||||
global sym(vp8_loop_filter_bh_y_sse2) PRIVATE
|
||||
sym(vp8_loop_filter_bh_y_sse2):
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__,x64
|
||||
@@ -273,7 +273,7 @@ LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2
|
||||
; const char *thresh
|
||||
;)
|
||||
|
||||
global sym(vp8_loop_filter_bv_y_sse2)
|
||||
global sym(vp8_loop_filter_bv_y_sse2) PRIVATE
|
||||
sym(vp8_loop_filter_bv_y_sse2):
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__,x64
|
||||
|
@@ -21,7 +21,7 @@
|
||||
; const char *thresh,
|
||||
; int count
|
||||
;)
|
||||
global sym(vp8_loop_filter_horizontal_edge_mmx)
|
||||
global sym(vp8_loop_filter_horizontal_edge_mmx) PRIVATE
|
||||
sym(vp8_loop_filter_horizontal_edge_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -233,7 +233,7 @@ sym(vp8_loop_filter_horizontal_edge_mmx):
|
||||
; const char *thresh,
|
||||
; int count
|
||||
;)
|
||||
global sym(vp8_loop_filter_vertical_edge_mmx)
|
||||
global sym(vp8_loop_filter_vertical_edge_mmx) PRIVATE
|
||||
sym(vp8_loop_filter_vertical_edge_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -603,7 +603,7 @@ sym(vp8_loop_filter_vertical_edge_mmx):
|
||||
; const char *thresh,
|
||||
; int count
|
||||
;)
|
||||
global sym(vp8_mbloop_filter_horizontal_edge_mmx)
|
||||
global sym(vp8_mbloop_filter_horizontal_edge_mmx) PRIVATE
|
||||
sym(vp8_mbloop_filter_horizontal_edge_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -920,7 +920,7 @@ sym(vp8_mbloop_filter_horizontal_edge_mmx):
|
||||
; const char *thresh,
|
||||
; int count
|
||||
;)
|
||||
global sym(vp8_mbloop_filter_vertical_edge_mmx)
|
||||
global sym(vp8_mbloop_filter_vertical_edge_mmx) PRIVATE
|
||||
sym(vp8_mbloop_filter_vertical_edge_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1384,7 +1384,7 @@ sym(vp8_mbloop_filter_vertical_edge_mmx):
|
||||
; int src_pixel_step,
|
||||
; const char *blimit
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
|
||||
global sym(vp8_loop_filter_simple_horizontal_edge_mmx) PRIVATE
|
||||
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1500,7 +1500,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
||||
; int src_pixel_step,
|
||||
; const char *blimit
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_vertical_edge_mmx)
|
||||
global sym(vp8_loop_filter_simple_vertical_edge_mmx) PRIVATE
|
||||
sym(vp8_loop_filter_simple_vertical_edge_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -286,7 +286,7 @@
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
;)
|
||||
global sym(vp8_loop_filter_horizontal_edge_sse2)
|
||||
global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE
|
||||
sym(vp8_loop_filter_horizontal_edge_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -334,7 +334,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
|
||||
; const char *thresh,
|
||||
; int count
|
||||
;)
|
||||
global sym(vp8_loop_filter_horizontal_edge_uv_sse2)
|
||||
global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE
|
||||
sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -561,7 +561,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
;)
|
||||
global sym(vp8_mbloop_filter_horizontal_edge_sse2)
|
||||
global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE
|
||||
sym(vp8_mbloop_filter_horizontal_edge_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -607,7 +607,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
|
||||
; const char *thresh,
|
||||
; unsigned char *v
|
||||
;)
|
||||
global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2)
|
||||
global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE
|
||||
sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -928,7 +928,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
;)
|
||||
global sym(vp8_loop_filter_vertical_edge_sse2)
|
||||
global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE
|
||||
sym(vp8_loop_filter_vertical_edge_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -993,7 +993,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
|
||||
; const char *thresh,
|
||||
; unsigned char *v
|
||||
;)
|
||||
global sym(vp8_loop_filter_vertical_edge_uv_sse2)
|
||||
global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE
|
||||
sym(vp8_loop_filter_vertical_edge_uv_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1142,7 +1142,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
;)
|
||||
global sym(vp8_mbloop_filter_vertical_edge_sse2)
|
||||
global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE
|
||||
sym(vp8_mbloop_filter_vertical_edge_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1209,7 +1209,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
|
||||
; const char *thresh,
|
||||
; unsigned char *v
|
||||
;)
|
||||
global sym(vp8_mbloop_filter_vertical_edge_uv_sse2)
|
||||
global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE
|
||||
sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1269,7 +1269,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
|
||||
; int src_pixel_step,
|
||||
; const char *blimit,
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
|
||||
global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE
|
||||
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1374,7 +1374,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
||||
; int src_pixel_step,
|
||||
; const char *blimit,
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_vertical_edge_sse2)
|
||||
global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE
|
||||
sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
||||
push rbp ; save old base pointer value.
|
||||
mov rbp, rsp ; set new base pointer value.
|
||||
|
@@ -19,7 +19,7 @@
|
||||
; int dst_stride,
|
||||
; int src_weight
|
||||
;)
|
||||
global sym(vp8_filter_by_weight16x16_sse2)
|
||||
global sym(vp8_filter_by_weight16x16_sse2) PRIVATE
|
||||
sym(vp8_filter_by_weight16x16_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -97,7 +97,7 @@ sym(vp8_filter_by_weight16x16_sse2):
|
||||
; int dst_stride,
|
||||
; int src_weight
|
||||
;)
|
||||
global sym(vp8_filter_by_weight8x8_sse2)
|
||||
global sym(vp8_filter_by_weight8x8_sse2) PRIVATE
|
||||
sym(vp8_filter_by_weight8x8_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -165,7 +165,7 @@ sym(vp8_filter_by_weight8x8_sse2):
|
||||
; unsigned int *variance, 4
|
||||
; unsigned int *sad, 5
|
||||
;)
|
||||
global sym(vp8_variance_and_sad_16x16_sse2)
|
||||
global sym(vp8_variance_and_sad_16x16_sse2) PRIVATE
|
||||
sym(vp8_variance_and_sad_16x16_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -24,7 +24,7 @@
|
||||
; int cols,
|
||||
; int flimit
|
||||
;)
|
||||
global sym(vp8_post_proc_down_and_across_mmx)
|
||||
global sym(vp8_post_proc_down_and_across_mmx) PRIVATE
|
||||
sym(vp8_post_proc_down_and_across_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -282,7 +282,7 @@ sym(vp8_post_proc_down_and_across_mmx):
|
||||
;void vp8_mbpost_proc_down_mmx(unsigned char *dst,
|
||||
; int pitch, int rows, int cols,int flimit)
|
||||
extern sym(vp8_rv)
|
||||
global sym(vp8_mbpost_proc_down_mmx)
|
||||
global sym(vp8_mbpost_proc_down_mmx) PRIVATE
|
||||
sym(vp8_mbpost_proc_down_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -510,7 +510,7 @@ sym(vp8_mbpost_proc_down_mmx):
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int Width, unsigned int Height, int Pitch)
|
||||
extern sym(rand)
|
||||
global sym(vp8_plane_add_noise_mmx)
|
||||
global sym(vp8_plane_add_noise_mmx) PRIVATE
|
||||
sym(vp8_plane_add_noise_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -21,7 +21,7 @@
|
||||
; int cols,
|
||||
; int flimit
|
||||
;)
|
||||
global sym(vp8_post_proc_down_and_across_xmm)
|
||||
global sym(vp8_post_proc_down_and_across_xmm) PRIVATE
|
||||
sym(vp8_post_proc_down_and_across_xmm):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -269,7 +269,7 @@ sym(vp8_post_proc_down_and_across_xmm):
|
||||
;void vp8_mbpost_proc_down_xmm(unsigned char *dst,
|
||||
; int pitch, int rows, int cols,int flimit)
|
||||
extern sym(vp8_rv)
|
||||
global sym(vp8_mbpost_proc_down_xmm)
|
||||
global sym(vp8_mbpost_proc_down_xmm) PRIVATE
|
||||
sym(vp8_mbpost_proc_down_xmm):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -497,7 +497,7 @@ sym(vp8_mbpost_proc_down_xmm):
|
||||
|
||||
;void vp8_mbpost_proc_across_ip_xmm(unsigned char *src,
|
||||
; int pitch, int rows, int cols,int flimit)
|
||||
global sym(vp8_mbpost_proc_across_ip_xmm)
|
||||
global sym(vp8_mbpost_proc_across_ip_xmm) PRIVATE
|
||||
sym(vp8_mbpost_proc_across_ip_xmm):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -694,7 +694,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int Width, unsigned int Height, int Pitch)
|
||||
extern sym(rand)
|
||||
global sym(vp8_plane_add_noise_wmt)
|
||||
global sym(vp8_plane_add_noise_wmt) PRIVATE
|
||||
sym(vp8_plane_add_noise_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -18,7 +18,7 @@
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; )
|
||||
global sym(vp8_copy_mem8x8_mmx)
|
||||
global sym(vp8_copy_mem8x8_mmx) PRIVATE
|
||||
sym(vp8_copy_mem8x8_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -81,7 +81,7 @@ sym(vp8_copy_mem8x8_mmx):
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; )
|
||||
global sym(vp8_copy_mem8x4_mmx)
|
||||
global sym(vp8_copy_mem8x4_mmx) PRIVATE
|
||||
sym(vp8_copy_mem8x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -125,7 +125,7 @@ sym(vp8_copy_mem8x4_mmx):
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; )
|
||||
global sym(vp8_copy_mem16x16_mmx)
|
||||
global sym(vp8_copy_mem16x16_mmx) PRIVATE
|
||||
sym(vp8_copy_mem16x16_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -17,7 +17,7 @@
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; )
|
||||
global sym(vp8_copy_mem16x16_sse2)
|
||||
global sym(vp8_copy_mem16x16_sse2) PRIVATE
|
||||
sym(vp8_copy_mem16x16_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -123,7 +123,7 @@ sym(vp8_copy_mem16x16_sse2):
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dc_mmx2)
|
||||
global sym(vp8_intra_pred_uv_dc_mmx2) PRIVATE
|
||||
sym(vp8_intra_pred_uv_dc_mmx2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -196,7 +196,7 @@ sym(vp8_intra_pred_uv_dc_mmx2):
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dctop_mmx2)
|
||||
global sym(vp8_intra_pred_uv_dctop_mmx2) PRIVATE
|
||||
sym(vp8_intra_pred_uv_dctop_mmx2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -250,7 +250,7 @@ sym(vp8_intra_pred_uv_dctop_mmx2):
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dcleft_mmx2)
|
||||
global sym(vp8_intra_pred_uv_dcleft_mmx2) PRIVATE
|
||||
sym(vp8_intra_pred_uv_dcleft_mmx2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -317,7 +317,7 @@ sym(vp8_intra_pred_uv_dcleft_mmx2):
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dc128_mmx)
|
||||
global sym(vp8_intra_pred_uv_dc128_mmx) PRIVATE
|
||||
sym(vp8_intra_pred_uv_dc128_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -357,7 +357,7 @@ sym(vp8_intra_pred_uv_dc128_mmx):
|
||||
; int left_stride,
|
||||
; )
|
||||
%macro vp8_intra_pred_uv_tm 1
|
||||
global sym(vp8_intra_pred_uv_tm_%1)
|
||||
global sym(vp8_intra_pred_uv_tm_%1) PRIVATE
|
||||
sym(vp8_intra_pred_uv_tm_%1):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -437,7 +437,7 @@ vp8_intra_pred_uv_tm ssse3
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_ve_mmx)
|
||||
global sym(vp8_intra_pred_uv_ve_mmx) PRIVATE
|
||||
sym(vp8_intra_pred_uv_ve_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -479,7 +479,7 @@ sym(vp8_intra_pred_uv_ve_mmx):
|
||||
; int left_stride
|
||||
; )
|
||||
%macro vp8_intra_pred_uv_ho 1
|
||||
global sym(vp8_intra_pred_uv_ho_%1)
|
||||
global sym(vp8_intra_pred_uv_ho_%1) PRIVATE
|
||||
sym(vp8_intra_pred_uv_ho_%1):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -577,7 +577,7 @@ vp8_intra_pred_uv_ho ssse3
|
||||
; unsigned char *left,
|
||||
; int left_stride
|
||||
; )
|
||||
global sym(vp8_intra_pred_y_dc_sse2)
|
||||
global sym(vp8_intra_pred_y_dc_sse2) PRIVATE
|
||||
sym(vp8_intra_pred_y_dc_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -683,7 +683,7 @@ sym(vp8_intra_pred_y_dc_sse2):
|
||||
; unsigned char *left,
|
||||
; int left_stride
|
||||
; )
|
||||
global sym(vp8_intra_pred_y_dctop_sse2)
|
||||
global sym(vp8_intra_pred_y_dctop_sse2) PRIVATE
|
||||
sym(vp8_intra_pred_y_dctop_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -745,7 +745,7 @@ sym(vp8_intra_pred_y_dctop_sse2):
|
||||
; unsigned char *left,
|
||||
; int left_stride
|
||||
; )
|
||||
global sym(vp8_intra_pred_y_dcleft_sse2)
|
||||
global sym(vp8_intra_pred_y_dcleft_sse2) PRIVATE
|
||||
sym(vp8_intra_pred_y_dcleft_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -838,7 +838,7 @@ sym(vp8_intra_pred_y_dcleft_sse2):
|
||||
; unsigned char *left,
|
||||
; int left_stride
|
||||
; )
|
||||
global sym(vp8_intra_pred_y_dc128_sse2)
|
||||
global sym(vp8_intra_pred_y_dc128_sse2) PRIVATE
|
||||
sym(vp8_intra_pred_y_dc128_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -885,7 +885,7 @@ sym(vp8_intra_pred_y_dc128_sse2):
|
||||
; int left_stride
|
||||
; )
|
||||
%macro vp8_intra_pred_y_tm 1
|
||||
global sym(vp8_intra_pred_y_tm_%1)
|
||||
global sym(vp8_intra_pred_y_tm_%1) PRIVATE
|
||||
sym(vp8_intra_pred_y_tm_%1):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -972,7 +972,7 @@ vp8_intra_pred_y_tm ssse3
|
||||
; unsigned char *left,
|
||||
; int left_stride
|
||||
; )
|
||||
global sym(vp8_intra_pred_y_ve_sse2)
|
||||
global sym(vp8_intra_pred_y_ve_sse2) PRIVATE
|
||||
sym(vp8_intra_pred_y_ve_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1020,7 +1020,7 @@ sym(vp8_intra_pred_y_ve_sse2):
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_y_ho_sse2)
|
||||
global sym(vp8_intra_pred_y_ho_sse2) PRIVATE
|
||||
sym(vp8_intra_pred_y_ho_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -11,11 +11,11 @@
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
global sym(vp8_sad16x16_mmx)
|
||||
global sym(vp8_sad8x16_mmx)
|
||||
global sym(vp8_sad8x8_mmx)
|
||||
global sym(vp8_sad4x4_mmx)
|
||||
global sym(vp8_sad16x8_mmx)
|
||||
global sym(vp8_sad16x16_mmx) PRIVATE
|
||||
global sym(vp8_sad8x16_mmx) PRIVATE
|
||||
global sym(vp8_sad8x8_mmx) PRIVATE
|
||||
global sym(vp8_sad4x4_mmx) PRIVATE
|
||||
global sym(vp8_sad16x8_mmx) PRIVATE
|
||||
|
||||
;unsigned int vp8_sad16x16_mmx(
|
||||
; unsigned char *src_ptr,
|
||||
|
@@ -16,7 +16,7 @@
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad16x16_wmt)
|
||||
global sym(vp8_sad16x16_wmt) PRIVATE
|
||||
sym(vp8_sad16x16_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -90,7 +90,7 @@ sym(vp8_sad16x16_wmt):
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int max_sad)
|
||||
global sym(vp8_sad8x16_wmt)
|
||||
global sym(vp8_sad8x16_wmt) PRIVATE
|
||||
sym(vp8_sad8x16_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -153,7 +153,7 @@ sym(vp8_sad8x16_wmt):
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad8x8_wmt)
|
||||
global sym(vp8_sad8x8_wmt) PRIVATE
|
||||
sym(vp8_sad8x8_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -206,7 +206,7 @@ sym(vp8_sad8x8_wmt):
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad4x4_wmt)
|
||||
global sym(vp8_sad4x4_wmt) PRIVATE
|
||||
sym(vp8_sad4x4_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -261,7 +261,7 @@ sym(vp8_sad4x4_wmt):
|
||||
; int src_stride,
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride)
|
||||
global sym(vp8_sad16x8_wmt)
|
||||
global sym(vp8_sad16x8_wmt) PRIVATE
|
||||
sym(vp8_sad16x8_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -335,7 +335,7 @@ sym(vp8_sad16x8_wmt):
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse2)
|
||||
global sym(vp8_copy32xn_sse2) PRIVATE
|
||||
sym(vp8_copy32xn_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -380,7 +380,7 @@
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x16x3_sse3)
|
||||
global sym(vp8_sad16x16x3_sse3) PRIVATE
|
||||
sym(vp8_sad16x16x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
@@ -422,7 +422,7 @@ sym(vp8_sad16x16x3_sse3):
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x8x3_sse3)
|
||||
global sym(vp8_sad16x8x3_sse3) PRIVATE
|
||||
sym(vp8_sad16x8x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
@@ -460,7 +460,7 @@ sym(vp8_sad16x8x3_sse3):
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x16x3_sse3)
|
||||
global sym(vp8_sad8x16x3_sse3) PRIVATE
|
||||
sym(vp8_sad8x16x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
@@ -489,7 +489,7 @@ sym(vp8_sad8x16x3_sse3):
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x8x3_sse3)
|
||||
global sym(vp8_sad8x8x3_sse3) PRIVATE
|
||||
sym(vp8_sad8x8x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
@@ -514,7 +514,7 @@ sym(vp8_sad8x8x3_sse3):
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad4x4x3_sse3)
|
||||
global sym(vp8_sad4x4x3_sse3) PRIVATE
|
||||
sym(vp8_sad4x4x3_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
@@ -589,7 +589,7 @@ sym(vp8_sad4x4x3_sse3):
|
||||
; int ref_stride,
|
||||
; int max_sad)
|
||||
;%define lddqu movdqu
|
||||
global sym(vp8_sad16x16_sse3)
|
||||
global sym(vp8_sad16x16_sse3) PRIVATE
|
||||
sym(vp8_sad16x16_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
@@ -642,7 +642,7 @@ sym(vp8_sad16x16_sse3):
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse3)
|
||||
global sym(vp8_copy32xn_sse3) PRIVATE
|
||||
sym(vp8_copy32xn_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
@@ -703,7 +703,7 @@ sym(vp8_copy32xn_sse3):
|
||||
; unsigned char *ref_ptr_base,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x16x4d_sse3)
|
||||
global sym(vp8_sad16x16x4d_sse3) PRIVATE
|
||||
sym(vp8_sad16x16x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
@@ -754,7 +754,7 @@ sym(vp8_sad16x16x4d_sse3):
|
||||
; unsigned char *ref_ptr_base,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x8x4d_sse3)
|
||||
global sym(vp8_sad16x8x4d_sse3) PRIVATE
|
||||
sym(vp8_sad16x8x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
@@ -801,7 +801,7 @@ sym(vp8_sad16x8x4d_sse3):
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x16x4d_sse3)
|
||||
global sym(vp8_sad8x16x4d_sse3) PRIVATE
|
||||
sym(vp8_sad8x16x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
@@ -834,7 +834,7 @@ sym(vp8_sad8x16x4d_sse3):
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad8x8x4d_sse3)
|
||||
global sym(vp8_sad8x8x4d_sse3) PRIVATE
|
||||
sym(vp8_sad8x8x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
@@ -863,7 +863,7 @@ sym(vp8_sad8x8x4d_sse3):
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad4x4x4d_sse3)
|
||||
global sym(vp8_sad4x4x4d_sse3) PRIVATE
|
||||
sym(vp8_sad4x4x4d_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X4
|
||||
|
@@ -161,7 +161,7 @@
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array);
|
||||
global sym(vp8_sad16x16x8_sse4)
|
||||
global sym(vp8_sad16x16x8_sse4) PRIVATE
|
||||
sym(vp8_sad16x16x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -203,7 +203,7 @@ sym(vp8_sad16x16x8_sse4):
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad16x8x8_sse4)
|
||||
global sym(vp8_sad16x8x8_sse4) PRIVATE
|
||||
sym(vp8_sad16x8x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -241,7 +241,7 @@ sym(vp8_sad16x8x8_sse4):
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad8x8x8_sse4)
|
||||
global sym(vp8_sad8x8x8_sse4) PRIVATE
|
||||
sym(vp8_sad8x8x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -279,7 +279,7 @@ sym(vp8_sad8x8x8_sse4):
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad8x16x8_sse4)
|
||||
global sym(vp8_sad8x16x8_sse4) PRIVATE
|
||||
sym(vp8_sad8x16x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -320,7 +320,7 @@ sym(vp8_sad8x16x8_sse4):
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad4x4x8_sse4)
|
||||
global sym(vp8_sad4x4x8_sse4) PRIVATE
|
||||
sym(vp8_sad4x4x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -152,7 +152,7 @@
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x16x3_ssse3)
|
||||
global sym(vp8_sad16x16x3_ssse3) PRIVATE
|
||||
sym(vp8_sad16x16x3_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -265,7 +265,7 @@ sym(vp8_sad16x16x3_ssse3):
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; int *results)
|
||||
global sym(vp8_sad16x8x3_ssse3)
|
||||
global sym(vp8_sad16x8x3_ssse3) PRIVATE
|
||||
sym(vp8_sad16x8x3_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -28,7 +28,7 @@ extern sym(vp8_bilinear_filters_x86_8)
|
||||
; unsigned int output_width,
|
||||
; short * vp8_filter
|
||||
;)
|
||||
global sym(vp8_filter_block1d_h6_mmx)
|
||||
global sym(vp8_filter_block1d_h6_mmx) PRIVATE
|
||||
sym(vp8_filter_block1d_h6_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -125,7 +125,7 @@ sym(vp8_filter_block1d_h6_mmx):
|
||||
; unsigned int output_width,
|
||||
; short * vp8_filter
|
||||
;)
|
||||
global sym(vp8_filter_block1dc_v6_mmx)
|
||||
global sym(vp8_filter_block1dc_v6_mmx) PRIVATE
|
||||
sym(vp8_filter_block1dc_v6_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -213,7 +213,7 @@ sym(vp8_filter_block1dc_v6_mmx):
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_pitch
|
||||
;)
|
||||
global sym(vp8_bilinear_predict8x8_mmx)
|
||||
global sym(vp8_bilinear_predict8x8_mmx) PRIVATE
|
||||
sym(vp8_bilinear_predict8x8_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -370,7 +370,7 @@ sym(vp8_bilinear_predict8x8_mmx):
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_pitch
|
||||
;)
|
||||
global sym(vp8_bilinear_predict8x4_mmx)
|
||||
global sym(vp8_bilinear_predict8x4_mmx) PRIVATE
|
||||
sym(vp8_bilinear_predict8x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -525,7 +525,7 @@ sym(vp8_bilinear_predict8x4_mmx):
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_pitch
|
||||
;)
|
||||
global sym(vp8_bilinear_predict4x4_mmx)
|
||||
global sym(vp8_bilinear_predict4x4_mmx) PRIVATE
|
||||
sym(vp8_bilinear_predict4x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -33,7 +33,7 @@ extern sym(vp8_bilinear_filters_x86_8)
|
||||
; unsigned int output_width,
|
||||
; short *vp8_filter
|
||||
;)
|
||||
global sym(vp8_filter_block1d8_h6_sse2)
|
||||
global sym(vp8_filter_block1d8_h6_sse2) PRIVATE
|
||||
sym(vp8_filter_block1d8_h6_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -153,7 +153,7 @@ sym(vp8_filter_block1d8_h6_sse2):
|
||||
; even number. This function handles 8 pixels in horizontal direction, calculating ONE
|
||||
; rows each iteration to take advantage of the 128 bits operations.
|
||||
;*************************************************************************************/
|
||||
global sym(vp8_filter_block1d16_h6_sse2)
|
||||
global sym(vp8_filter_block1d16_h6_sse2) PRIVATE
|
||||
sym(vp8_filter_block1d16_h6_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -329,7 +329,7 @@ sym(vp8_filter_block1d16_h6_sse2):
|
||||
; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The
|
||||
; input pixel array has output_height rows.
|
||||
;*************************************************************************************/
|
||||
global sym(vp8_filter_block1d8_v6_sse2)
|
||||
global sym(vp8_filter_block1d8_v6_sse2) PRIVATE
|
||||
sym(vp8_filter_block1d8_v6_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -424,7 +424,7 @@ sym(vp8_filter_block1d8_v6_sse2):
|
||||
; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
|
||||
; input pixel array has output_height rows.
|
||||
;*************************************************************************************/
|
||||
global sym(vp8_filter_block1d16_v6_sse2)
|
||||
global sym(vp8_filter_block1d16_v6_sse2) PRIVATE
|
||||
sym(vp8_filter_block1d16_v6_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -534,7 +534,7 @@ sym(vp8_filter_block1d16_v6_sse2):
|
||||
; const short *vp8_filter
|
||||
;)
|
||||
; First-pass filter only when yoffset==0
|
||||
global sym(vp8_filter_block1d8_h6_only_sse2)
|
||||
global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE
|
||||
sym(vp8_filter_block1d8_h6_only_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -647,7 +647,7 @@ sym(vp8_filter_block1d8_h6_only_sse2):
|
||||
; const short *vp8_filter
|
||||
;)
|
||||
; First-pass filter only when yoffset==0
|
||||
global sym(vp8_filter_block1d16_h6_only_sse2)
|
||||
global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE
|
||||
sym(vp8_filter_block1d16_h6_only_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -812,7 +812,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
|
||||
; const short *vp8_filter
|
||||
;)
|
||||
; Second-pass filter only when xoffset==0
|
||||
global sym(vp8_filter_block1d8_v6_only_sse2)
|
||||
global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE
|
||||
sym(vp8_filter_block1d8_v6_only_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -904,7 +904,7 @@ sym(vp8_filter_block1d8_v6_only_sse2):
|
||||
; unsigned int output_height,
|
||||
; unsigned int output_width
|
||||
;)
|
||||
global sym(vp8_unpack_block1d16_h6_sse2)
|
||||
global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE
|
||||
sym(vp8_unpack_block1d16_h6_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -963,7 +963,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
|
||||
; int dst_pitch
|
||||
;)
|
||||
extern sym(vp8_bilinear_filters_x86_8)
|
||||
global sym(vp8_bilinear_predict16x16_sse2)
|
||||
global sym(vp8_bilinear_predict16x16_sse2) PRIVATE
|
||||
sym(vp8_bilinear_predict16x16_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1231,7 +1231,7 @@ sym(vp8_bilinear_predict16x16_sse2):
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_pitch
|
||||
;)
|
||||
global sym(vp8_bilinear_predict8x8_sse2)
|
||||
global sym(vp8_bilinear_predict8x8_sse2) PRIVATE
|
||||
sym(vp8_bilinear_predict8x8_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -34,7 +34,7 @@
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d8_h6_ssse3)
|
||||
global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE
|
||||
sym(vp8_filter_block1d8_h6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -177,7 +177,7 @@ vp8_filter_block1d8_h4_ssse3:
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d16_h6_ssse3)
|
||||
global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE
|
||||
sym(vp8_filter_block1d16_h6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -284,7 +284,7 @@ sym(vp8_filter_block1d16_h6_ssse3):
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d4_h6_ssse3)
|
||||
global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE
|
||||
sym(vp8_filter_block1d4_h6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -413,7 +413,7 @@ sym(vp8_filter_block1d4_h6_ssse3):
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d16_v6_ssse3)
|
||||
global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE
|
||||
sym(vp8_filter_block1d16_v6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -601,7 +601,7 @@ sym(vp8_filter_block1d16_v6_ssse3):
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d8_v6_ssse3)
|
||||
global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE
|
||||
sym(vp8_filter_block1d8_v6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -741,7 +741,7 @@ sym(vp8_filter_block1d8_v6_ssse3):
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d4_v6_ssse3)
|
||||
global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE
|
||||
sym(vp8_filter_block1d4_v6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -880,7 +880,7 @@ sym(vp8_filter_block1d4_v6_ssse3):
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_pitch
|
||||
;)
|
||||
global sym(vp8_bilinear_predict16x16_ssse3)
|
||||
global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE
|
||||
sym(vp8_bilinear_predict16x16_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1143,7 +1143,7 @@ sym(vp8_bilinear_predict16x16_ssse3):
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_pitch
|
||||
;)
|
||||
global sym(vp8_bilinear_predict8x8_ssse3)
|
||||
global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE
|
||||
sym(vp8_bilinear_predict8x8_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -12,7 +12,7 @@
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;unsigned int vp8_get_mb_ss_mmx( short *src_ptr )
|
||||
global sym(vp8_get_mb_ss_mmx)
|
||||
global sym(vp8_get_mb_ss_mmx) PRIVATE
|
||||
sym(vp8_get_mb_ss_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -72,7 +72,7 @@ sym(vp8_get_mb_ss_mmx):
|
||||
; unsigned int *SSE,
|
||||
; int *Sum
|
||||
;)
|
||||
global sym(vp8_get8x8var_mmx)
|
||||
global sym(vp8_get8x8var_mmx) PRIVATE
|
||||
sym(vp8_get8x8var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -320,7 +320,7 @@ sym(vp8_get8x8var_mmx):
|
||||
; unsigned int *SSE,
|
||||
; int *Sum
|
||||
;)
|
||||
global sym(vp8_get4x4var_mmx)
|
||||
global sym(vp8_get4x4var_mmx) PRIVATE
|
||||
sym(vp8_get4x4var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -433,7 +433,7 @@ sym(vp8_get4x4var_mmx):
|
||||
; unsigned char *ref_ptr,
|
||||
; int recon_stride
|
||||
;)
|
||||
global sym(vp8_get4x4sse_cs_mmx)
|
||||
global sym(vp8_get4x4sse_cs_mmx) PRIVATE
|
||||
sym(vp8_get4x4sse_cs_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -522,7 +522,7 @@ sym(vp8_get4x4sse_cs_mmx):
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_filter_block2d_bil4x4_var_mmx)
|
||||
global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
|
||||
sym(vp8_filter_block2d_bil4x4_var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -667,7 +667,7 @@ sym(vp8_filter_block2d_bil4x4_var_mmx):
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_filter_block2d_bil_var_mmx)
|
||||
global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
|
||||
sym(vp8_filter_block2d_bil_var_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -17,7 +17,7 @@
|
||||
;(
|
||||
; short *src_ptr
|
||||
;)
|
||||
global sym(vp8_get_mb_ss_sse2)
|
||||
global sym(vp8_get_mb_ss_sse2) PRIVATE
|
||||
sym(vp8_get_mb_ss_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -80,7 +80,7 @@ sym(vp8_get_mb_ss_sse2):
|
||||
; unsigned int * SSE,
|
||||
; int * Sum
|
||||
;)
|
||||
global sym(vp8_get16x16var_sse2)
|
||||
global sym(vp8_get16x16var_sse2) PRIVATE
|
||||
sym(vp8_get16x16var_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -224,7 +224,7 @@ sym(vp8_get16x16var_sse2):
|
||||
; unsigned int * SSE,
|
||||
; int * Sum
|
||||
;)
|
||||
global sym(vp8_get8x8var_sse2)
|
||||
global sym(vp8_get8x8var_sse2) PRIVATE
|
||||
sym(vp8_get8x8var_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -413,7 +413,7 @@ sym(vp8_get8x8var_sse2):
|
||||
; unsigned int *sumsquared;;
|
||||
;
|
||||
;)
|
||||
global sym(vp8_filter_block2d_bil_var_sse2)
|
||||
global sym(vp8_filter_block2d_bil_var_sse2) PRIVATE
|
||||
sym(vp8_filter_block2d_bil_var_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -690,7 +690,7 @@ filter_block2d_bil_variance:
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_vert_variance8x_h_sse2)
|
||||
global sym(vp8_half_horiz_vert_variance8x_h_sse2) PRIVATE
|
||||
sym(vp8_half_horiz_vert_variance8x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -812,7 +812,7 @@ vp8_half_horiz_vert_variance8x_h_1:
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_vert_variance16x_h_sse2)
|
||||
global sym(vp8_half_horiz_vert_variance16x_h_sse2) PRIVATE
|
||||
sym(vp8_half_horiz_vert_variance16x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -928,7 +928,7 @@ vp8_half_horiz_vert_variance16x_h_1:
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_vert_variance8x_h_sse2)
|
||||
global sym(vp8_half_vert_variance8x_h_sse2) PRIVATE
|
||||
sym(vp8_half_vert_variance8x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1035,7 +1035,7 @@ vp8_half_vert_variance8x_h_1:
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_vert_variance16x_h_sse2)
|
||||
global sym(vp8_half_vert_variance16x_h_sse2) PRIVATE
|
||||
sym(vp8_half_vert_variance16x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1143,7 +1143,7 @@ vp8_half_vert_variance16x_h_1:
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_variance8x_h_sse2)
|
||||
global sym(vp8_half_horiz_variance8x_h_sse2) PRIVATE
|
||||
sym(vp8_half_horiz_variance8x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -1248,7 +1248,7 @@ vp8_half_horiz_variance8x_h_1:
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_variance16x_h_sse2)
|
||||
global sym(vp8_half_horiz_variance16x_h_sse2) PRIVATE
|
||||
sym(vp8_half_horiz_variance16x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -29,7 +29,7 @@
|
||||
;)
|
||||
;Note: The filter coefficient at offset=0 is 128. Since the second register
|
||||
;for Pmaddubsw is signed bytes, we must calculate zero offset seperately.
|
||||
global sym(vp8_filter_block2d_bil_var_ssse3)
|
||||
global sym(vp8_filter_block2d_bil_var_ssse3) PRIVATE
|
||||
sym(vp8_filter_block2d_bil_var_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -549,8 +549,8 @@ static void setup_token_decoder(VP8D_COMP *pbi,
|
||||
{
|
||||
vp8_reader *bool_decoder = &pbi->bc2;
|
||||
unsigned int partition_idx;
|
||||
int fragment_idx;
|
||||
int num_token_partitions;
|
||||
unsigned int fragment_idx;
|
||||
unsigned int num_token_partitions;
|
||||
const unsigned char *first_fragment_end = pbi->fragments[0] +
|
||||
pbi->fragment_sizes[0];
|
||||
|
||||
@@ -1132,7 +1132,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION)
|
||||
{
|
||||
int i;
|
||||
unsigned int i;
|
||||
vp8mt_decode_mb_rows(pbi, xd);
|
||||
vp8_yv12_extend_frame_borders(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/
|
||||
for (i = 0; i < pbi->decoding_thread_count; ++i)
|
||||
|
@@ -300,7 +300,7 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
|
||||
if (pbi->num_fragments == 0)
|
||||
{
|
||||
/* New frame, reset fragment pointers and sizes */
|
||||
vpx_memset(pbi->fragments, 0, sizeof(pbi->fragments));
|
||||
vpx_memset((void*)pbi->fragments, 0, sizeof(pbi->fragments));
|
||||
vpx_memset(pbi->fragment_sizes, 0, sizeof(pbi->fragment_sizes));
|
||||
}
|
||||
if (pbi->input_fragments && !(source == NULL && size == 0))
|
||||
@@ -468,7 +468,7 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi
|
||||
if(cm->filter_level)
|
||||
{
|
||||
/* Apply the loop filter if appropriate. */
|
||||
vp8_loop_filter_frame(cm, &pbi->mb);
|
||||
vp8_loop_filter_frame(cm, &pbi->mb, cm->frame_type);
|
||||
}
|
||||
vp8_yv12_extend_frame_borders(cm->frame_to_show);
|
||||
}
|
||||
|
@@ -62,7 +62,7 @@ typedef struct VP8D_COMP
|
||||
volatile int b_multithreaded_rd;
|
||||
int max_threads;
|
||||
int current_mb_col_main;
|
||||
int decoding_thread_count;
|
||||
unsigned int decoding_thread_count;
|
||||
int allocated_decoding_thread_count;
|
||||
|
||||
int mt_baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
|
@@ -667,7 +667,7 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
|
||||
void vp8_decoder_create_threads(VP8D_COMP *pbi)
|
||||
{
|
||||
int core_count = 0;
|
||||
int ithread;
|
||||
unsigned int ithread;
|
||||
|
||||
pbi->b_multithreaded_rd = 0;
|
||||
pbi->allocated_decoding_thread_count = 0;
|
||||
@@ -881,7 +881,8 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
|
||||
void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
{
|
||||
VP8_COMMON *pc = &pbi->common;
|
||||
int i;
|
||||
unsigned int i;
|
||||
int j;
|
||||
|
||||
int filter_level = pc->filter_level;
|
||||
|
||||
@@ -892,19 +893,19 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
|
||||
vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
|
||||
|
||||
for (i=1; i<pc->mb_rows; i++)
|
||||
for (j=1; j<pc->mb_rows; j++)
|
||||
{
|
||||
vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
|
||||
vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
|
||||
vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
|
||||
}
|
||||
|
||||
/* Set left_col to 129 initially */
|
||||
for (i=0; i<pc->mb_rows; i++)
|
||||
for (j=0; j<pc->mb_rows; j++)
|
||||
{
|
||||
vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
|
||||
vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
|
||||
vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
|
||||
vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
|
||||
vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
|
||||
vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
|
||||
}
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
|
@@ -107,7 +107,7 @@ typedef struct macroblock
|
||||
|
||||
int skip;
|
||||
|
||||
int encode_breakout;
|
||||
unsigned int encode_breakout;
|
||||
|
||||
//char * gf_active_ptr;
|
||||
signed char *gf_active_ptr;
|
||||
@@ -119,6 +119,16 @@ typedef struct macroblock
|
||||
int optimize;
|
||||
int q_index;
|
||||
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
MB_PREDICTION_MODE best_sse_inter_mode;
|
||||
int_mv best_sse_mv;
|
||||
MV_REFERENCE_FRAME best_reference_frame;
|
||||
MV_REFERENCE_FRAME best_zeromv_reference_frame;
|
||||
unsigned char need_to_clamp_best_mvs;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
void (*short_fdct4x4)(short *input, short *output, int pitch);
|
||||
void (*short_fdct8x4)(short *input, short *output, int pitch);
|
||||
void (*short_walsh4x4)(short *input, short *output, int pitch);
|
||||
|
@@ -15,198 +15,319 @@
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_rtcd.h"
|
||||
|
||||
static const unsigned int NOISE_MOTION_THRESHOLD = 20*20;
|
||||
static const unsigned int NOISE_DIFF2_THRESHOLD = 75;
|
||||
static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
|
||||
// SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100.
|
||||
static const unsigned int SSE_DIFF_THRESHOLD = 16*16*20;
|
||||
static const unsigned int SSE_THRESHOLD = 16*16*40;
|
||||
static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20;
|
||||
static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
|
||||
|
||||
static uint8_t blend(uint8_t state, uint8_t sample, uint8_t factor_q8)
|
||||
{
|
||||
return (uint8_t)(
|
||||
(((uint16_t)factor_q8 * ((uint16_t)state) + // Q8
|
||||
(uint16_t)(256 - factor_q8) * ((uint16_t)sample)) + 128) // Q8
|
||||
>> 8);
|
||||
}
|
||||
// The filtering coefficients used for denoizing are adjusted for static
|
||||
// blocks, or blocks with very small motion vectors. This is done through
|
||||
// the motion magnitude parameter.
|
||||
//
|
||||
// There are currently 2048 possible mapping from absolute difference to
|
||||
// filter coefficient depending on the motion magnitude. Each mapping is
|
||||
// in a LUT table. All these tables are staticly allocated but they are only
|
||||
// filled on their first use.
|
||||
//
|
||||
// Each entry is a pair of 16b values, the coefficient and its complement
|
||||
// to 256. Each of these value should only be 8b but they are 16b wide to
|
||||
// avoid slow partial register manipulations.
|
||||
enum {num_motion_magnitude_adjustments = 2048};
|
||||
|
||||
static unsigned int denoiser_motion_compensate(YV12_BUFFER_CONFIG* src,
|
||||
YV12_BUFFER_CONFIG* dst,
|
||||
MACROBLOCK* x,
|
||||
unsigned int best_sse,
|
||||
unsigned int zero_mv_sse,
|
||||
int recon_yoffset,
|
||||
int recon_uvoffset)
|
||||
{
|
||||
MACROBLOCKD filter_xd = x->e_mbd;
|
||||
int mv_col;
|
||||
int mv_row;
|
||||
int sse_diff = zero_mv_sse - best_sse;
|
||||
// Compensate the running average.
|
||||
filter_xd.pre.y_buffer = src->y_buffer + recon_yoffset;
|
||||
filter_xd.pre.u_buffer = src->u_buffer + recon_uvoffset;
|
||||
filter_xd.pre.v_buffer = src->v_buffer + recon_uvoffset;
|
||||
// Write the compensated running average to the destination buffer.
|
||||
filter_xd.dst.y_buffer = dst->y_buffer + recon_yoffset;
|
||||
filter_xd.dst.u_buffer = dst->u_buffer + recon_uvoffset;
|
||||
filter_xd.dst.v_buffer = dst->v_buffer + recon_uvoffset;
|
||||
// Use the best MV for the compensation.
|
||||
filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
|
||||
filter_xd.mode_info_context->mbmi.mode = filter_xd.best_sse_inter_mode;
|
||||
filter_xd.mode_info_context->mbmi.mv = filter_xd.best_sse_mv;
|
||||
filter_xd.mode_info_context->mbmi.need_to_clamp_mvs =
|
||||
filter_xd.need_to_clamp_best_mvs;
|
||||
mv_col = filter_xd.best_sse_mv.as_mv.col;
|
||||
mv_row = filter_xd.best_sse_mv.as_mv.row;
|
||||
if (filter_xd.mode_info_context->mbmi.mode <= B_PRED ||
|
||||
(mv_row*mv_row + mv_col*mv_col <= NOISE_MOTION_THRESHOLD &&
|
||||
sse_diff < SSE_DIFF_THRESHOLD))
|
||||
{
|
||||
// Handle intra blocks as referring to last frame with zero motion and
|
||||
// let the absolute pixel difference affect the filter factor.
|
||||
// Also consider small amount of motion as being random walk due to noise,
|
||||
// if it doesn't mean that we get a much bigger error.
|
||||
// Note that any changes to the mode info only affects the denoising.
|
||||
filter_xd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
|
||||
filter_xd.mode_info_context->mbmi.mode = ZEROMV;
|
||||
filter_xd.mode_info_context->mbmi.mv.as_int = 0;
|
||||
x->e_mbd.best_sse_inter_mode = ZEROMV;
|
||||
x->e_mbd.best_sse_mv.as_int = 0;
|
||||
best_sse = zero_mv_sse;
|
||||
}
|
||||
if (!x->skip)
|
||||
{
|
||||
vp8_build_inter_predictors_mb(&filter_xd);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_build_inter16x16_predictors_mb(&filter_xd,
|
||||
filter_xd.dst.y_buffer,
|
||||
filter_xd.dst.u_buffer,
|
||||
filter_xd.dst.v_buffer,
|
||||
filter_xd.dst.y_stride,
|
||||
filter_xd.dst.uv_stride);
|
||||
}
|
||||
return best_sse;
|
||||
}
|
||||
static union coeff_pair filter_coeff_LUT[num_motion_magnitude_adjustments][256];
|
||||
static uint8_t filter_coeff_LUT_initialized[num_motion_magnitude_adjustments] =
|
||||
{ 0 };
|
||||
|
||||
static void denoiser_filter(YV12_BUFFER_CONFIG* mc_running_avg,
|
||||
YV12_BUFFER_CONFIG* running_avg,
|
||||
MACROBLOCK* signal,
|
||||
unsigned int motion_magnitude2,
|
||||
int y_offset,
|
||||
int uv_offset)
|
||||
|
||||
union coeff_pair *vp8_get_filter_coeff_LUT(unsigned int motion_magnitude)
|
||||
{
|
||||
unsigned char* sig = signal->thismb;
|
||||
int sig_stride = 16;
|
||||
unsigned char* mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
|
||||
int mc_avg_y_stride = mc_running_avg->y_stride;
|
||||
unsigned char* running_avg_y = running_avg->y_buffer + y_offset;
|
||||
int avg_y_stride = running_avg->y_stride;
|
||||
int r, c;
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
for (c = 0; c < 16; c++)
|
||||
union coeff_pair *LUT;
|
||||
unsigned int motion_magnitude_adjustment = motion_magnitude >> 3;
|
||||
|
||||
if (motion_magnitude_adjustment >= num_motion_magnitude_adjustments)
|
||||
{
|
||||
int diff;
|
||||
int absdiff = 0;
|
||||
unsigned int filter_coefficient;
|
||||
absdiff = sig[c] - mc_running_avg_y[c];
|
||||
absdiff = absdiff > 0 ? absdiff : -absdiff;
|
||||
assert(absdiff >= 0 && absdiff < 256);
|
||||
filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3));
|
||||
// Allow some additional filtering of static blocks, or blocks with very
|
||||
// small motion vectors.
|
||||
filter_coefficient += filter_coefficient / (3 + (motion_magnitude2 >> 3));
|
||||
filter_coefficient = filter_coefficient > 255 ? 255 : filter_coefficient;
|
||||
|
||||
running_avg_y[c] = blend(mc_running_avg_y[c], sig[c], filter_coefficient);
|
||||
diff = sig[c] - running_avg_y[c];
|
||||
|
||||
if (diff * diff < NOISE_DIFF2_THRESHOLD)
|
||||
{
|
||||
// Replace with mean to suppress the noise.
|
||||
sig[c] = running_avg_y[c];
|
||||
}
|
||||
else
|
||||
{
|
||||
// Replace the filter state with the signal since the change in this
|
||||
// pixel isn't classified as noise.
|
||||
running_avg_y[c] = sig[c];
|
||||
}
|
||||
motion_magnitude_adjustment = num_motion_magnitude_adjustments - 1;
|
||||
}
|
||||
sig += sig_stride;
|
||||
mc_running_avg_y += mc_avg_y_stride;
|
||||
running_avg_y += avg_y_stride;
|
||||
}
|
||||
|
||||
LUT = filter_coeff_LUT[motion_magnitude_adjustment];
|
||||
|
||||
if (!filter_coeff_LUT_initialized[motion_magnitude_adjustment])
|
||||
{
|
||||
int absdiff;
|
||||
|
||||
for (absdiff = 0; absdiff < 256; ++absdiff)
|
||||
{
|
||||
unsigned int filter_coefficient;
|
||||
filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3));
|
||||
filter_coefficient += filter_coefficient /
|
||||
(3 + motion_magnitude_adjustment);
|
||||
|
||||
if (filter_coefficient > 255)
|
||||
{
|
||||
filter_coefficient = 255;
|
||||
}
|
||||
|
||||
LUT[absdiff].as_short[0] = filter_coefficient ;
|
||||
LUT[absdiff].as_short[1] = 256 - filter_coefficient;
|
||||
}
|
||||
|
||||
filter_coeff_LUT_initialized[motion_magnitude_adjustment] = 1;
|
||||
}
|
||||
|
||||
return LUT;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
|
||||
YV12_BUFFER_CONFIG *running_avg,
|
||||
MACROBLOCK *signal,
|
||||
unsigned int motion_magnitude,
|
||||
int y_offset,
|
||||
int uv_offset)
|
||||
{
|
||||
unsigned char filtered_buf[16*16];
|
||||
unsigned char *filtered = filtered_buf;
|
||||
unsigned char *sig = signal->thismb;
|
||||
int sig_stride = 16;
|
||||
unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
|
||||
int mc_avg_y_stride = mc_running_avg->y_stride;
|
||||
unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
|
||||
int avg_y_stride = running_avg->y_stride;
|
||||
const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude);
|
||||
int r, c;
|
||||
int sum_diff = 0;
|
||||
|
||||
for (r = 0; r < 16; ++r)
|
||||
{
|
||||
// Calculate absolute differences
|
||||
unsigned char abs_diff[16];
|
||||
|
||||
union coeff_pair filter_coefficient[16];
|
||||
|
||||
for (c = 0; c < 16; ++c)
|
||||
{
|
||||
int absdiff = sig[c] - mc_running_avg_y[c];
|
||||
absdiff = absdiff > 0 ? absdiff : -absdiff;
|
||||
abs_diff[c] = absdiff;
|
||||
}
|
||||
|
||||
// Use LUT to get filter coefficients (two 16b value; f and 256-f)
|
||||
for (c = 0; c < 16; ++c)
|
||||
{
|
||||
filter_coefficient[c] = LUT[abs_diff[c]];
|
||||
}
|
||||
|
||||
// Filtering...
|
||||
for (c = 0; c < 16; ++c)
|
||||
{
|
||||
const uint16_t state = (uint16_t)(mc_running_avg_y[c]);
|
||||
const uint16_t sample = (uint16_t)(sig[c]);
|
||||
|
||||
running_avg_y[c] = (filter_coefficient[c].as_short[0] * state +
|
||||
filter_coefficient[c].as_short[1] * sample + 128) >> 8;
|
||||
}
|
||||
|
||||
// Depending on the magnitude of the difference between the signal and
|
||||
// filtered version, either replace the signal by the filtered one or
|
||||
// update the filter state with the signal when the change in a pixel
|
||||
// isn't classified as noise.
|
||||
for (c = 0; c < 16; ++c)
|
||||
{
|
||||
const int diff = sig[c] - running_avg_y[c];
|
||||
sum_diff += diff;
|
||||
|
||||
if (diff * diff < NOISE_DIFF2_THRESHOLD)
|
||||
{
|
||||
filtered[c] = running_avg_y[c];
|
||||
}
|
||||
else
|
||||
{
|
||||
filtered[c] = sig[c];
|
||||
running_avg_y[c] = sig[c];
|
||||
}
|
||||
}
|
||||
|
||||
// Update pointers for next iteration.
|
||||
sig += sig_stride;
|
||||
filtered += 16;
|
||||
mc_running_avg_y += mc_avg_y_stride;
|
||||
running_avg_y += avg_y_stride;
|
||||
}
|
||||
if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
|
||||
{
|
||||
return COPY_BLOCK;
|
||||
}
|
||||
vp8_copy_mem16x16(filtered_buf, 16, signal->thismb, sig_stride);
|
||||
return FILTER_BLOCK;
|
||||
}
|
||||
|
||||
|
||||
int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height)
|
||||
{
|
||||
assert(denoiser);
|
||||
denoiser->yv12_running_avg.flags = 0;
|
||||
if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg), width,
|
||||
height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_denoiser_free(denoiser);
|
||||
return 1;
|
||||
}
|
||||
denoiser->yv12_mc_running_avg.flags = 0;
|
||||
if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width,
|
||||
height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_denoiser_free(denoiser);
|
||||
return 1;
|
||||
}
|
||||
vpx_memset(denoiser->yv12_running_avg.buffer_alloc, 0,
|
||||
denoiser->yv12_running_avg.frame_size);
|
||||
vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
|
||||
denoiser->yv12_mc_running_avg.frame_size);
|
||||
return 0;
|
||||
int i;
|
||||
assert(denoiser);
|
||||
|
||||
/* don't need one for intra start at 1 */
|
||||
for (i = 1; i < MAX_REF_FRAMES; i++)
|
||||
{
|
||||
denoiser->yv12_running_avg[i].flags = 0;
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg[i]), width,
|
||||
height, VP8BORDERINPIXELS)
|
||||
< 0)
|
||||
{
|
||||
vp8_denoiser_free(denoiser);
|
||||
return 1;
|
||||
}
|
||||
vpx_memset(denoiser->yv12_running_avg[i].buffer_alloc, 0,
|
||||
denoiser->yv12_running_avg[i].frame_size);
|
||||
|
||||
}
|
||||
denoiser->yv12_mc_running_avg.flags = 0;
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width,
|
||||
height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_denoiser_free(denoiser);
|
||||
return 1;
|
||||
}
|
||||
|
||||
vpx_memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
|
||||
denoiser->yv12_mc_running_avg.frame_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vp8_denoiser_free(VP8_DENOISER *denoiser)
|
||||
{
|
||||
assert(denoiser);
|
||||
vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg);
|
||||
vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
|
||||
int i;
|
||||
assert(denoiser);
|
||||
|
||||
/* we don't have one for intra ref frame */
|
||||
for (i = 1; i < MAX_REF_FRAMES ; i++)
|
||||
{
|
||||
vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]);
|
||||
}
|
||||
vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
|
||||
}
|
||||
|
||||
|
||||
void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
|
||||
MACROBLOCK *x,
|
||||
unsigned int best_sse,
|
||||
unsigned int zero_mv_sse,
|
||||
int recon_yoffset,
|
||||
int recon_uvoffset) {
|
||||
int mv_row;
|
||||
int mv_col;
|
||||
unsigned int motion_magnitude2;
|
||||
// Motion compensate the running average.
|
||||
best_sse = denoiser_motion_compensate(&denoiser->yv12_running_avg,
|
||||
&denoiser->yv12_mc_running_avg,
|
||||
x,
|
||||
best_sse,
|
||||
zero_mv_sse,
|
||||
recon_yoffset,
|
||||
recon_uvoffset);
|
||||
int recon_uvoffset)
|
||||
{
|
||||
int mv_row;
|
||||
int mv_col;
|
||||
unsigned int motion_magnitude2;
|
||||
|
||||
mv_row = x->e_mbd.best_sse_mv.as_mv.row;
|
||||
mv_col = x->e_mbd.best_sse_mv.as_mv.col;
|
||||
motion_magnitude2 = mv_row*mv_row + mv_col*mv_col;
|
||||
if (best_sse > SSE_THRESHOLD ||
|
||||
motion_magnitude2 > 8 * NOISE_MOTION_THRESHOLD)
|
||||
{
|
||||
// No filtering of this block since it differs too much from the predictor,
|
||||
// or the motion vector magnitude is considered too big.
|
||||
vp8_copy_mem16x16(x->thismb, 16,
|
||||
denoiser->yv12_running_avg.y_buffer + recon_yoffset,
|
||||
denoiser->yv12_running_avg.y_stride);
|
||||
return;
|
||||
}
|
||||
// Filter.
|
||||
denoiser_filter(&denoiser->yv12_mc_running_avg,
|
||||
&denoiser->yv12_running_avg,
|
||||
x,
|
||||
motion_magnitude2,
|
||||
recon_yoffset,
|
||||
recon_uvoffset);
|
||||
MV_REFERENCE_FRAME frame = x->best_reference_frame;
|
||||
MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
|
||||
|
||||
enum vp8_denoiser_decision decision = FILTER_BLOCK;
|
||||
|
||||
// Motion compensate the running average.
|
||||
if (zero_frame)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame];
|
||||
YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg;
|
||||
YV12_BUFFER_CONFIG saved_pre,saved_dst;
|
||||
MB_MODE_INFO saved_mbmi;
|
||||
MACROBLOCKD *filter_xd = &x->e_mbd;
|
||||
MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi;
|
||||
int mv_col;
|
||||
int mv_row;
|
||||
int sse_diff = zero_mv_sse - best_sse;
|
||||
|
||||
saved_mbmi = *mbmi;
|
||||
|
||||
// Use the best MV for the compensation.
|
||||
mbmi->ref_frame = x->best_reference_frame;
|
||||
mbmi->mode = x->best_sse_inter_mode;
|
||||
mbmi->mv = x->best_sse_mv;
|
||||
mbmi->need_to_clamp_mvs = x->need_to_clamp_best_mvs;
|
||||
mv_col = x->best_sse_mv.as_mv.col;
|
||||
mv_row = x->best_sse_mv.as_mv.row;
|
||||
|
||||
if (frame == INTRA_FRAME ||
|
||||
(mv_row *mv_row + mv_col *mv_col <= NOISE_MOTION_THRESHOLD &&
|
||||
sse_diff < SSE_DIFF_THRESHOLD))
|
||||
{
|
||||
// Handle intra blocks as referring to last frame with zero motion
|
||||
// and let the absolute pixel difference affect the filter factor.
|
||||
// Also consider small amount of motion as being random walk due to
|
||||
// noise, if it doesn't mean that we get a much bigger error.
|
||||
// Note that any changes to the mode info only affects the denoising.
|
||||
mbmi->ref_frame =
|
||||
x->best_zeromv_reference_frame;
|
||||
|
||||
src = &denoiser->yv12_running_avg[zero_frame];
|
||||
|
||||
mbmi->mode = ZEROMV;
|
||||
mbmi->mv.as_int = 0;
|
||||
x->best_sse_inter_mode = ZEROMV;
|
||||
x->best_sse_mv.as_int = 0;
|
||||
best_sse = zero_mv_sse;
|
||||
}
|
||||
|
||||
saved_pre = filter_xd->pre;
|
||||
saved_dst = filter_xd->dst;
|
||||
|
||||
// Compensate the running average.
|
||||
filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset;
|
||||
filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset;
|
||||
filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset;
|
||||
// Write the compensated running average to the destination buffer.
|
||||
filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset;
|
||||
filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset;
|
||||
filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset;
|
||||
|
||||
if (!x->skip)
|
||||
{
|
||||
vp8_build_inter_predictors_mb(filter_xd);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_build_inter16x16_predictors_mb(filter_xd,
|
||||
filter_xd->dst.y_buffer,
|
||||
filter_xd->dst.u_buffer,
|
||||
filter_xd->dst.v_buffer,
|
||||
filter_xd->dst.y_stride,
|
||||
filter_xd->dst.uv_stride);
|
||||
}
|
||||
filter_xd->pre = saved_pre;
|
||||
filter_xd->dst = saved_dst;
|
||||
*mbmi = saved_mbmi;
|
||||
|
||||
}
|
||||
|
||||
mv_row = x->best_sse_mv.as_mv.row;
|
||||
mv_col = x->best_sse_mv.as_mv.col;
|
||||
motion_magnitude2 = mv_row * mv_row + mv_col * mv_col;
|
||||
if (best_sse > SSE_THRESHOLD || motion_magnitude2
|
||||
> 8 * NOISE_MOTION_THRESHOLD)
|
||||
{
|
||||
decision = COPY_BLOCK;
|
||||
}
|
||||
|
||||
if (decision == FILTER_BLOCK)
|
||||
{
|
||||
// Filter.
|
||||
decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
|
||||
&denoiser->yv12_running_avg[LAST_FRAME],
|
||||
x,
|
||||
motion_magnitude2,
|
||||
recon_yoffset, recon_uvoffset);
|
||||
}
|
||||
if (decision == COPY_BLOCK)
|
||||
{
|
||||
// No filtering of this block; it differs too much from the predictor,
|
||||
// or the motion vector magnitude is considered too big.
|
||||
vp8_copy_mem16x16(
|
||||
x->thismb, 16,
|
||||
denoiser->yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset,
|
||||
denoiser->yv12_running_avg[LAST_FRAME].y_stride);
|
||||
}
|
||||
}
|
||||
|
@@ -13,10 +13,19 @@
|
||||
|
||||
#include "block.h"
|
||||
|
||||
#define NOISE_DIFF2_THRESHOLD (75)
|
||||
#define SUM_DIFF_THRESHOLD (16 * 16 * 2)
|
||||
|
||||
enum vp8_denoiser_decision
|
||||
{
|
||||
COPY_BLOCK,
|
||||
FILTER_BLOCK,
|
||||
};
|
||||
|
||||
typedef struct vp8_denoiser
|
||||
{
|
||||
YV12_BUFFER_CONFIG yv12_running_avg;
|
||||
YV12_BUFFER_CONFIG yv12_mc_running_avg;
|
||||
YV12_BUFFER_CONFIG yv12_running_avg[MAX_REF_FRAMES];
|
||||
YV12_BUFFER_CONFIG yv12_mc_running_avg;
|
||||
} VP8_DENOISER;
|
||||
|
||||
int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height);
|
||||
@@ -30,4 +39,12 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
|
||||
int recon_yoffset,
|
||||
int recon_uvoffset);
|
||||
|
||||
union coeff_pair
|
||||
{
|
||||
uint32_t as_int;
|
||||
uint16_t as_short[2];
|
||||
};
|
||||
|
||||
union coeff_pair *vp8_get_filter_coeff_LUT(unsigned int motion_magnitude);
|
||||
|
||||
#endif // VP8_ENCODER_DENOISING_H_
|
||||
|
@@ -1177,9 +1177,11 @@ int vp8cx_encode_inter_macroblock
|
||||
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
// Reset the best sse mode/mv for each macroblock.
|
||||
x->e_mbd.best_sse_inter_mode = 0;
|
||||
x->e_mbd.best_sse_mv.as_int = 0;
|
||||
x->e_mbd.need_to_clamp_best_mvs = 0;
|
||||
x->best_reference_frame = INTRA_FRAME;
|
||||
x->best_zeromv_reference_frame = INTRA_FRAME;
|
||||
x->best_sse_inter_mode = 0;
|
||||
x->best_sse_mv.as_int = 0;
|
||||
x->need_to_clamp_best_mvs = 0;
|
||||
#endif
|
||||
|
||||
if (cpi->sf.RD)
|
||||
|
@@ -1655,6 +1655,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_height ||
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
|
||||
{
|
||||
dealloc_raw_frame_buffers(cpi);
|
||||
alloc_raw_frame_buffers(cpi);
|
||||
vp8_alloc_compressor_data(cpi);
|
||||
}
|
||||
@@ -3113,6 +3114,8 @@ static void update_reference_frames(VP8_COMMON *cm)
|
||||
|
||||
void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
|
||||
{
|
||||
const FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
if (cm->no_lpf)
|
||||
{
|
||||
cm->filter_level = 0;
|
||||
@@ -3130,6 +3133,11 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
|
||||
else
|
||||
vp8cx_pick_filter_level(cpi->Source, cpi);
|
||||
|
||||
if (cm->filter_level > 0)
|
||||
{
|
||||
vp8cx_set_alt_lf_level(cpi, cm->filter_level);
|
||||
}
|
||||
|
||||
vpx_usec_timer_mark(&timer);
|
||||
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
|
||||
}
|
||||
@@ -3141,17 +3149,56 @@ void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
|
||||
|
||||
if (cm->filter_level > 0)
|
||||
{
|
||||
vp8cx_set_alt_lf_level(cpi, cm->filter_level);
|
||||
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd);
|
||||
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, frame_type);
|
||||
}
|
||||
|
||||
vp8_yv12_extend_frame_borders(cm->frame_to_show);
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity)
|
||||
{
|
||||
vp8_yv12_extend_frame_borders(&cpi->denoiser.yv12_running_avg);
|
||||
|
||||
|
||||
/* we shouldn't have to keep multiple copies as we know in advance which
|
||||
* buffer we should start - for now to get something up and running
|
||||
* I've chosen to copy the buffers
|
||||
*/
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
{
|
||||
int i;
|
||||
vp8_yv12_copy_frame(
|
||||
cpi->Source,
|
||||
&cpi->denoiser.yv12_running_avg[LAST_FRAME]);
|
||||
|
||||
vp8_yv12_extend_frame_borders(
|
||||
&cpi->denoiser.yv12_running_avg[LAST_FRAME]);
|
||||
|
||||
for (i = 2; i < MAX_REF_FRAMES - 1; i++)
|
||||
vp8_yv12_copy_frame(
|
||||
cpi->Source,
|
||||
&cpi->denoiser.yv12_running_avg[i]);
|
||||
}
|
||||
else /* For non key frames */
|
||||
{
|
||||
vp8_yv12_extend_frame_borders(
|
||||
&cpi->denoiser.yv12_running_avg[LAST_FRAME]);
|
||||
|
||||
if (cm->refresh_alt_ref_frame || cm->copy_buffer_to_arf)
|
||||
{
|
||||
vp8_yv12_copy_frame(
|
||||
&cpi->denoiser.yv12_running_avg[LAST_FRAME],
|
||||
&cpi->denoiser.yv12_running_avg[ALTREF_FRAME]);
|
||||
}
|
||||
if (cm->refresh_golden_frame || cm->copy_buffer_to_gf)
|
||||
{
|
||||
vp8_yv12_copy_frame(
|
||||
&cpi->denoiser.yv12_running_avg[LAST_FRAME],
|
||||
&cpi->denoiser.yv12_running_avg[GOLDEN_FRAME]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
static void encode_frame_to_data_rate
|
||||
@@ -4862,7 +4909,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
|
||||
|
||||
if (cpi->oxcf.number_of_layers > 1)
|
||||
{
|
||||
int i;
|
||||
unsigned int i;
|
||||
|
||||
// Update frame rates for each layer
|
||||
for (i=0; i<cpi->oxcf.number_of_layers; i++)
|
||||
|
@@ -61,7 +61,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
|
||||
}
|
||||
|
||||
|
||||
static int get_inter_mbpred_error(MACROBLOCK *mb,
|
||||
int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
|
||||
const vp8_variance_fn_ptr_t *vfp,
|
||||
unsigned int *sse,
|
||||
int_mv this_mv)
|
||||
@@ -458,7 +458,7 @@ static void check_for_encode_breakout(unsigned int sse, MACROBLOCK* x)
|
||||
if (sse < x->encode_breakout)
|
||||
{
|
||||
// Check u and v to make sure skip is ok
|
||||
int sse2 = 0;
|
||||
unsigned int sse2 = 0;
|
||||
|
||||
sse2 = VP8_UVSSE(x);
|
||||
|
||||
@@ -486,7 +486,7 @@ static int evaluate_inter_mode(unsigned int* sse, int rate2, int* distortion2, V
|
||||
|
||||
if((this_mode != NEWMV) ||
|
||||
!(cpi->sf.half_pixel_search) || cpi->common.full_pixel==1)
|
||||
*distortion2 = get_inter_mbpred_error(x,
|
||||
*distortion2 = vp8_get_inter_mbpred_error(x,
|
||||
&cpi->fn_ptr[BLOCK_16X16],
|
||||
sse, mv);
|
||||
|
||||
@@ -523,7 +523,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
int best_mode_index = 0;
|
||||
unsigned int sse = INT_MAX, best_rd_sse = INT_MAX;
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
unsigned int zero_mv_sse = 0, best_sse = INT_MAX;
|
||||
unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX;
|
||||
#endif
|
||||
|
||||
int_mv mvp;
|
||||
@@ -964,25 +964,27 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity)
|
||||
{
|
||||
// Store for later use by denoiser.
|
||||
if (this_mode == ZEROMV &&
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
|
||||
{
|
||||
zero_mv_sse = sse;
|
||||
}
|
||||
|
||||
// Store the best NEWMV in x for later use in the denoiser.
|
||||
// We are restricted to the LAST_FRAME since the denoiser only keeps
|
||||
// one filter state.
|
||||
if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
|
||||
{
|
||||
best_sse = sse;
|
||||
x->e_mbd.best_sse_inter_mode = NEWMV;
|
||||
x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
|
||||
x->e_mbd.need_to_clamp_best_mvs =
|
||||
x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
|
||||
}
|
||||
// Store for later use by denoiser.
|
||||
if (this_mode == ZEROMV && sse < zero_mv_sse )
|
||||
{
|
||||
zero_mv_sse = sse;
|
||||
x->best_zeromv_reference_frame =
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame;
|
||||
}
|
||||
|
||||
// Store the best NEWMV in x for later use in the denoiser.
|
||||
if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
|
||||
sse < best_sse)
|
||||
{
|
||||
best_sse = sse;
|
||||
x->best_sse_inter_mode = NEWMV;
|
||||
x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
|
||||
x->need_to_clamp_best_mvs =
|
||||
x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
|
||||
x->best_reference_frame =
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1058,37 +1060,47 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity)
|
||||
{
|
||||
if (x->e_mbd.best_sse_inter_mode == DC_PRED) {
|
||||
// No best MV found.
|
||||
x->e_mbd.best_sse_inter_mode = best_mbmode.mode;
|
||||
x->e_mbd.best_sse_mv = best_mbmode.mv;
|
||||
x->e_mbd.need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs;
|
||||
best_sse = best_rd_sse;
|
||||
}
|
||||
vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
|
||||
recon_yoffset, recon_uvoffset);
|
||||
|
||||
// Reevaluate ZEROMV after denoising.
|
||||
if (best_mbmode.ref_frame == INTRA_FRAME)
|
||||
{
|
||||
int this_rd = 0;
|
||||
rate2 = 0;
|
||||
distortion2 = 0;
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
|
||||
rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
|
||||
this_mode = ZEROMV;
|
||||
rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
|
||||
x->e_mbd.mode_info_context->mbmi.mode = this_mode;
|
||||
x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
|
||||
x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
|
||||
this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x);
|
||||
|
||||
if (this_rd < best_rd || x->skip)
|
||||
if (x->best_sse_inter_mode == DC_PRED)
|
||||
{
|
||||
vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
|
||||
sizeof(MB_MODE_INFO));
|
||||
// No best MV found.
|
||||
x->best_sse_inter_mode = best_mbmode.mode;
|
||||
x->best_sse_mv = best_mbmode.mv;
|
||||
x->need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs;
|
||||
x->best_reference_frame = best_mbmode.ref_frame;
|
||||
best_sse = best_rd_sse;
|
||||
}
|
||||
}
|
||||
vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
|
||||
recon_yoffset, recon_uvoffset);
|
||||
|
||||
|
||||
// Reevaluate ZEROMV after denoising.
|
||||
if (best_mbmode.ref_frame == INTRA_FRAME &&
|
||||
x->best_zeromv_reference_frame != INTRA_FRAME)
|
||||
{
|
||||
int this_rd = 0;
|
||||
int this_ref_frame = x->best_zeromv_reference_frame;
|
||||
rate2 = x->ref_frame_cost[this_ref_frame] +
|
||||
vp8_cost_mv_ref(ZEROMV, mdcounts);
|
||||
distortion2 = 0;
|
||||
|
||||
// set up the proper prediction buffers for the frame
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
|
||||
x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
|
||||
x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
|
||||
x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
|
||||
x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
|
||||
x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
|
||||
this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x);
|
||||
|
||||
if (this_rd < best_rd)
|
||||
{
|
||||
vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi,
|
||||
sizeof(MB_MODE_INFO));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -20,4 +20,8 @@ extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
int mb_row, int mb_col);
|
||||
extern void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate);
|
||||
|
||||
extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
|
||||
const vp8_variance_fn_ptr_t *vfp,
|
||||
unsigned int *sse,
|
||||
int_mv this_mv);
|
||||
#endif
|
||||
|
@@ -357,7 +357,7 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
|
||||
{
|
||||
// boost defaults to half second
|
||||
int kf_boost;
|
||||
int target;
|
||||
unsigned int target;
|
||||
|
||||
// Clear down mmx registers to allow floating point in what follows
|
||||
vp8_clear_system_state(); //__asm emms;
|
||||
|
@@ -21,6 +21,7 @@
|
||||
#include "onyx_int.h"
|
||||
#include "modecosts.h"
|
||||
#include "encodeintra.h"
|
||||
#include "pickinter.h"
|
||||
#include "vp8/common/entropymode.h"
|
||||
#include "vp8/common/reconinter.h"
|
||||
#include "vp8/common/reconintra4x4.h"
|
||||
@@ -36,7 +37,6 @@
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
#include "denoising.h"
|
||||
#endif
|
||||
|
||||
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
|
||||
|
||||
#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
|
||||
@@ -1766,7 +1766,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
|
||||
{
|
||||
unsigned int sse;
|
||||
unsigned int var;
|
||||
int threshold = (xd->block[0].dequant[1]
|
||||
unsigned int threshold = (xd->block[0].dequant[1]
|
||||
* xd->block[0].dequant[1] >>4);
|
||||
|
||||
if(threshold < x->encode_breakout)
|
||||
@@ -1785,7 +1785,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
|
||||
(sse /2 > var && sse-var < 64))
|
||||
{
|
||||
// Check u and v to make sure skip is ok
|
||||
int sse2= VP8_UVSSE(x);
|
||||
unsigned int sse2 = VP8_UVSSE(x);
|
||||
if (sse2 * 2 < threshold)
|
||||
{
|
||||
x->skip = 1;
|
||||
@@ -1962,6 +1962,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
int intra_rd_penalty = 10* vp8_dc_quant(cpi->common.base_qindex,
|
||||
cpi->common.y1dc_delta_q);
|
||||
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX,
|
||||
best_rd_sse = INT_MAX;
|
||||
#endif
|
||||
|
||||
mode_mv = mode_mv_sb[sign_bias];
|
||||
best_ref_mv.as_int = 0;
|
||||
best_mode.rd = INT_MAX;
|
||||
@@ -2372,21 +2377,38 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
best_mode.intra_rd = this_rd;
|
||||
*returnintra = rd.distortion2 ;
|
||||
}
|
||||
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity)
|
||||
{
|
||||
// Store the best NEWMV in x for later use in the denoiser.
|
||||
// We are restricted to the LAST_FRAME since the denoiser only keeps
|
||||
// one filter state.
|
||||
if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
|
||||
{
|
||||
x->e_mbd.best_sse_inter_mode = NEWMV;
|
||||
x->e_mbd.best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
|
||||
x->e_mbd.need_to_clamp_best_mvs =
|
||||
x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
|
||||
}
|
||||
unsigned int sse;
|
||||
vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse,
|
||||
mode_mv[this_mode]);
|
||||
|
||||
if (sse < best_rd_sse)
|
||||
best_rd_sse = sse;
|
||||
|
||||
// Store for later use by denoiser.
|
||||
if (this_mode == ZEROMV && sse < zero_mv_sse )
|
||||
{
|
||||
zero_mv_sse = sse;
|
||||
x->best_zeromv_reference_frame =
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame;
|
||||
}
|
||||
|
||||
// Store the best NEWMV in x for later use in the denoiser.
|
||||
if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
|
||||
sse < best_sse)
|
||||
{
|
||||
best_sse = sse;
|
||||
vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse,
|
||||
mode_mv[this_mode]);
|
||||
x->best_sse_inter_mode = NEWMV;
|
||||
x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
|
||||
x->need_to_clamp_best_mvs =
|
||||
x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
|
||||
x->best_reference_frame =
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2459,42 +2481,55 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
|
||||
#if CONFIG_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity)
|
||||
{
|
||||
if (x->e_mbd.best_sse_inter_mode == DC_PRED) {
|
||||
// No best MV found.
|
||||
x->e_mbd.best_sse_inter_mode = best_mode.mbmode.mode;
|
||||
x->e_mbd.best_sse_mv = best_mode.mbmode.mv;
|
||||
x->e_mbd.need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
|
||||
}
|
||||
|
||||
// TODO(holmer): No SSEs are calculated in rdopt.c. What else can be used?
|
||||
vp8_denoiser_denoise_mb(&cpi->denoiser, x, 0, 0,
|
||||
recon_yoffset, recon_uvoffset);
|
||||
// Reevalute ZEROMV if the current mode is INTRA.
|
||||
if (best_mode.mbmode.ref_frame == INTRA_FRAME)
|
||||
{
|
||||
int this_rd = INT_MAX;
|
||||
int disable_skip = 0;
|
||||
int other_cost = 0;
|
||||
vpx_memset(&rd, 0, sizeof(rd));
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame = LAST_FRAME;
|
||||
rd.rate2 += x->ref_frame_cost[LAST_FRAME];
|
||||
rd.rate2 += vp8_cost_mv_ref(ZEROMV, mdcounts);
|
||||
x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
|
||||
x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
|
||||
x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
|
||||
this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
|
||||
this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
|
||||
disable_skip, uv_intra_tteob,
|
||||
intra_rd_penalty, cpi, x);
|
||||
if (this_rd < best_mode.rd || x->skip)
|
||||
if (x->best_sse_inter_mode == DC_PRED)
|
||||
{
|
||||
// Note index of best mode so far
|
||||
best_mode_index = mode_index;
|
||||
*returnrate = rd.rate2;
|
||||
*returndistortion = rd.distortion2;
|
||||
update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
|
||||
// No best MV found.
|
||||
x->best_sse_inter_mode = best_mode.mbmode.mode;
|
||||
x->best_sse_mv = best_mode.mbmode.mv;
|
||||
x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
|
||||
x->best_reference_frame = best_mode.mbmode.ref_frame;
|
||||
best_sse = best_rd_sse;
|
||||
}
|
||||
}
|
||||
vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
|
||||
recon_yoffset, recon_uvoffset);
|
||||
|
||||
|
||||
// Reevaluate ZEROMV after denoising.
|
||||
if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
|
||||
x->best_zeromv_reference_frame != INTRA_FRAME)
|
||||
{
|
||||
int this_rd = INT_MAX;
|
||||
int disable_skip = 0;
|
||||
int other_cost = 0;
|
||||
int this_ref_frame = x->best_zeromv_reference_frame;
|
||||
rd.rate2 = x->ref_frame_cost[this_ref_frame] +
|
||||
vp8_cost_mv_ref(ZEROMV, mdcounts);
|
||||
rd.distortion2 = 0;
|
||||
|
||||
// set up the proper prediction buffers for the frame
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
|
||||
x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
|
||||
x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
|
||||
x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
|
||||
x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
|
||||
x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
|
||||
|
||||
this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
|
||||
this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
|
||||
disable_skip, uv_intra_tteob,
|
||||
intra_rd_penalty, cpi, x);
|
||||
if (this_rd < best_mode.rd || x->skip)
|
||||
{
|
||||
// Note index of best mode so far
|
||||
best_mode_index = mode_index;
|
||||
*returnrate = rd.rate2;
|
||||
*returndistortion = rd.distortion2;
|
||||
update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -12,7 +12,7 @@
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_fdct4x4_mmx)
|
||||
global sym(vp8_short_fdct4x4_mmx) PRIVATE
|
||||
sym(vp8_short_fdct4x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -61,7 +61,7 @@
|
||||
%endmacro
|
||||
|
||||
;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_fdct4x4_sse2)
|
||||
global sym(vp8_short_fdct4x4_sse2) PRIVATE
|
||||
sym(vp8_short_fdct4x4_sse2):
|
||||
|
||||
STACK_FRAME_CREATE
|
||||
@@ -166,7 +166,7 @@ sym(vp8_short_fdct4x4_sse2):
|
||||
STACK_FRAME_DESTROY
|
||||
|
||||
;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_fdct8x4_sse2)
|
||||
global sym(vp8_short_fdct8x4_sse2) PRIVATE
|
||||
sym(vp8_short_fdct8x4_sse2):
|
||||
|
||||
STACK_FRAME_CREATE
|
||||
|
153
vp8/encoder/x86/denoising_sse2.c
Normal file
153
vp8/encoder/x86/denoising_sse2.c
Normal file
@@ -0,0 +1,153 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp8/encoder/denoising.h"
|
||||
|
||||
#include "vp8/common/reconinter.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_rtcd.h"
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
union sum_union {
|
||||
__m128i v;
|
||||
short e[8];
|
||||
};
|
||||
|
||||
int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
|
||||
YV12_BUFFER_CONFIG *running_avg,
|
||||
MACROBLOCK *signal, unsigned int motion_magnitude,
|
||||
int y_offset, int uv_offset)
|
||||
{
|
||||
unsigned char filtered_buf[16*16];
|
||||
unsigned char *filtered = filtered_buf;
|
||||
unsigned char *sig = signal->thismb;
|
||||
int sig_stride = 16;
|
||||
unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
|
||||
int mc_avg_y_stride = mc_running_avg->y_stride;
|
||||
unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
|
||||
int avg_y_stride = running_avg->y_stride;
|
||||
const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude);
|
||||
int r, c;
|
||||
__m128i acc_diff = { 0 };
|
||||
|
||||
for (r = 0; r < 16; ++r)
|
||||
{
|
||||
__m128i filter_coefficient_00, filter_coefficient_04;
|
||||
__m128i filter_coefficient_08, filter_coefficient_12;
|
||||
__m128i v_sig0, v_sig1;
|
||||
__m128i v_mc_running_avg_y0, v_mc_running_avg_y1;
|
||||
__m128i state0, state1, state2, state3;
|
||||
__m128i res0, res1, res2, res3;
|
||||
__m128i v_running_avg_y;
|
||||
__m128i diff0, diff1, diff0sq, diff1sq, diff_sq;
|
||||
const __m128i kNOISE_DIFF2_THRESHOLD =
|
||||
_mm_set1_epi8(NOISE_DIFF2_THRESHOLD);
|
||||
__m128i take_running, p0, p1, p2;
|
||||
const __m128i k_zero = _mm_set1_epi16(0);
|
||||
const __m128i k_128 = _mm_set1_epi32(128);
|
||||
|
||||
// Calculate absolute differences
|
||||
DECLARE_ALIGNED_ARRAY(16,unsigned char,abs_diff,16);
|
||||
DECLARE_ALIGNED_ARRAY(16,uint32_t,filter_coefficient,16);
|
||||
__m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0]));
|
||||
__m128i v_mc_running_avg_y = _mm_loadu_si128(
|
||||
(__m128i *)(&mc_running_avg_y[0]));
|
||||
__m128i a_minus_b = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
|
||||
__m128i b_minus_a = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
|
||||
__m128i v_abs_diff = _mm_adds_epu8(a_minus_b, b_minus_a);
|
||||
_mm_store_si128((__m128i *)(&abs_diff[0]), v_abs_diff);
|
||||
|
||||
// Use LUT to get filter coefficients (two 16b value; f and 256-f)
|
||||
for (c = 0; c < 16; ++c)
|
||||
{
|
||||
filter_coefficient[c] = LUT[abs_diff[c]].as_int;
|
||||
}
|
||||
|
||||
// Filtering...
|
||||
// load filter coefficients (two 16b value; f and 256-f)
|
||||
filter_coefficient_00 = _mm_load_si128(
|
||||
(__m128i *)(&filter_coefficient[ 0]));
|
||||
filter_coefficient_04 = _mm_load_si128(
|
||||
(__m128i *)(&filter_coefficient[ 4]));
|
||||
filter_coefficient_08 = _mm_load_si128(
|
||||
(__m128i *)(&filter_coefficient[ 8]));
|
||||
filter_coefficient_12 = _mm_load_si128(
|
||||
(__m128i *)(&filter_coefficient[12]));
|
||||
|
||||
// expand sig from 8b to 16b
|
||||
v_sig0 = _mm_unpacklo_epi8(v_sig, k_zero);
|
||||
v_sig1 = _mm_unpackhi_epi8(v_sig, k_zero);
|
||||
// expand mc_running_avg_y from 8b to 16b
|
||||
v_mc_running_avg_y0 = _mm_unpacklo_epi8(v_mc_running_avg_y, k_zero);
|
||||
v_mc_running_avg_y1 = _mm_unpackhi_epi8(v_mc_running_avg_y, k_zero);
|
||||
// interleave sig and mc_running_avg_y for upcoming multiply-add
|
||||
state0 = _mm_unpacklo_epi16(v_mc_running_avg_y0, v_sig0);
|
||||
state1 = _mm_unpackhi_epi16(v_mc_running_avg_y0, v_sig0);
|
||||
state2 = _mm_unpacklo_epi16(v_mc_running_avg_y1, v_sig1);
|
||||
state3 = _mm_unpackhi_epi16(v_mc_running_avg_y1, v_sig1);
|
||||
// blend values
|
||||
res0 = _mm_madd_epi16(filter_coefficient_00, state0);
|
||||
res1 = _mm_madd_epi16(filter_coefficient_04, state1);
|
||||
res2 = _mm_madd_epi16(filter_coefficient_08, state2);
|
||||
res3 = _mm_madd_epi16(filter_coefficient_12, state3);
|
||||
res0 = _mm_add_epi32(res0, k_128);
|
||||
res1 = _mm_add_epi32(res1, k_128);
|
||||
res2 = _mm_add_epi32(res2, k_128);
|
||||
res3 = _mm_add_epi32(res3, k_128);
|
||||
res0 = _mm_srai_epi32(res0, 8);
|
||||
res1 = _mm_srai_epi32(res1, 8);
|
||||
res2 = _mm_srai_epi32(res2, 8);
|
||||
res3 = _mm_srai_epi32(res3, 8);
|
||||
// combine the 32b results into a single 8b vector
|
||||
res0 = _mm_packs_epi32(res0, res1);
|
||||
res2 = _mm_packs_epi32(res2, res3);
|
||||
v_running_avg_y = _mm_packus_epi16(res0, res2);
|
||||
|
||||
// Depending on the magnitude of the difference between the signal and
|
||||
// filtered version, either replace the signal by the filtered one or
|
||||
// update the filter state with the signal when the change in a pixel
|
||||
// isn't classified as noise.
|
||||
diff0 = _mm_sub_epi16(v_sig0, res0);
|
||||
diff1 = _mm_sub_epi16(v_sig1, res2);
|
||||
acc_diff = _mm_add_epi16(acc_diff, _mm_add_epi16(diff0, diff1));
|
||||
|
||||
diff0sq = _mm_mullo_epi16(diff0, diff0);
|
||||
diff1sq = _mm_mullo_epi16(diff1, diff1);
|
||||
diff_sq = _mm_packus_epi16(diff0sq, diff1sq);
|
||||
take_running = _mm_cmplt_epi8(diff_sq, kNOISE_DIFF2_THRESHOLD);
|
||||
p0 = _mm_and_si128(take_running, v_running_avg_y);
|
||||
p1 = _mm_andnot_si128(take_running, v_sig);
|
||||
p2 = _mm_or_si128(p0, p1);
|
||||
_mm_storeu_si128((__m128i *)(&running_avg_y[0]), p2);
|
||||
_mm_storeu_si128((__m128i *)(&filtered[0]), p2);
|
||||
|
||||
// Update pointers for next iteration.
|
||||
sig += sig_stride;
|
||||
filtered += 16;
|
||||
mc_running_avg_y += mc_avg_y_stride;
|
||||
running_avg_y += avg_y_stride;
|
||||
}
|
||||
{
|
||||
// Compute the sum of all pixel differences of this MB.
|
||||
union sum_union s;
|
||||
int sum_diff;
|
||||
s.v = acc_diff;
|
||||
sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] +
|
||||
s.e[4] + s.e[5] + s.e[6] + s.e[7];
|
||||
if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
|
||||
{
|
||||
return COPY_BLOCK;
|
||||
}
|
||||
}
|
||||
vp8_copy_mem16x16(filtered_buf, 16, signal->thismb, sig_stride);
|
||||
return FILTER_BLOCK;
|
||||
}
|
@@ -12,7 +12,7 @@
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr)
|
||||
global sym(vp8_block_error_xmm)
|
||||
global sym(vp8_block_error_xmm) PRIVATE
|
||||
sym(vp8_block_error_xmm):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -60,7 +60,7 @@ sym(vp8_block_error_xmm):
|
||||
ret
|
||||
|
||||
;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr)
|
||||
global sym(vp8_block_error_mmx)
|
||||
global sym(vp8_block_error_mmx) PRIVATE
|
||||
sym(vp8_block_error_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -126,7 +126,7 @@ sym(vp8_block_error_mmx):
|
||||
|
||||
|
||||
;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
|
||||
global sym(vp8_mbblock_error_mmx_impl)
|
||||
global sym(vp8_mbblock_error_mmx_impl) PRIVATE
|
||||
sym(vp8_mbblock_error_mmx_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -203,7 +203,7 @@ sym(vp8_mbblock_error_mmx_impl):
|
||||
|
||||
|
||||
;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
|
||||
global sym(vp8_mbblock_error_xmm_impl)
|
||||
global sym(vp8_mbblock_error_xmm_impl) PRIVATE
|
||||
sym(vp8_mbblock_error_xmm_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -273,7 +273,7 @@ sym(vp8_mbblock_error_xmm_impl):
|
||||
|
||||
|
||||
;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
|
||||
global sym(vp8_mbuverror_mmx_impl)
|
||||
global sym(vp8_mbuverror_mmx_impl) PRIVATE
|
||||
sym(vp8_mbuverror_mmx_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -330,7 +330,7 @@ sym(vp8_mbuverror_mmx_impl):
|
||||
|
||||
|
||||
;int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
|
||||
global sym(vp8_mbuverror_xmm_impl)
|
||||
global sym(vp8_mbuverror_xmm_impl) PRIVATE
|
||||
sym(vp8_mbuverror_xmm_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -12,7 +12,7 @@
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_walsh4x4_sse2)
|
||||
global sym(vp8_short_walsh4x4_sse2) PRIVATE
|
||||
sym(vp8_short_walsh4x4_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -15,7 +15,7 @@
|
||||
; short *qcoeff_ptr,short *dequant_ptr,
|
||||
; short *scan_mask, short *round_ptr,
|
||||
; short *quant_ptr, short *dqcoeff_ptr);
|
||||
global sym(vp8_fast_quantize_b_impl_mmx)
|
||||
global sym(vp8_fast_quantize_b_impl_mmx) PRIVATE
|
||||
sym(vp8_fast_quantize_b_impl_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -16,7 +16,7 @@
|
||||
; (BLOCK *b, | 0
|
||||
; BLOCKD *d) | 1
|
||||
|
||||
global sym(vp8_regular_quantize_b_sse2)
|
||||
global sym(vp8_regular_quantize_b_sse2) PRIVATE
|
||||
sym(vp8_regular_quantize_b_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -240,7 +240,7 @@ ZIGZAG_LOOP 15
|
||||
; (BLOCK *b, | 0
|
||||
; BLOCKD *d) | 1
|
||||
|
||||
global sym(vp8_fast_quantize_b_sse2)
|
||||
global sym(vp8_fast_quantize_b_sse2) PRIVATE
|
||||
sym(vp8_fast_quantize_b_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -16,7 +16,7 @@
|
||||
; (BLOCK *b, | 0
|
||||
; BLOCKD *d) | 1
|
||||
|
||||
global sym(vp8_regular_quantize_b_sse4)
|
||||
global sym(vp8_regular_quantize_b_sse4) PRIVATE
|
||||
sym(vp8_regular_quantize_b_sse4):
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
|
@@ -17,7 +17,7 @@
|
||||
; BLOCKD *d) | 1
|
||||
;
|
||||
|
||||
global sym(vp8_fast_quantize_b_ssse3)
|
||||
global sym(vp8_fast_quantize_b_ssse3) PRIVATE
|
||||
sym(vp8_fast_quantize_b_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -61,7 +61,7 @@
|
||||
; or pavgb At this point this is just meant to be first pass for calculating
|
||||
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
|
||||
; in mode selection code.
|
||||
global sym(vp8_ssim_parms_16x16_sse2)
|
||||
global sym(vp8_ssim_parms_16x16_sse2) PRIVATE
|
||||
sym(vp8_ssim_parms_16x16_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -151,7 +151,7 @@ sym(vp8_ssim_parms_16x16_sse2):
|
||||
; or pavgb At this point this is just meant to be first pass for calculating
|
||||
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
|
||||
; in mode selection code.
|
||||
global sym(vp8_ssim_parms_8x8_sse2)
|
||||
global sym(vp8_ssim_parms_8x8_sse2) PRIVATE
|
||||
sym(vp8_ssim_parms_8x8_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -14,7 +14,7 @@
|
||||
;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
|
||||
; short *diff, unsigned char *Predictor,
|
||||
; int pitch);
|
||||
global sym(vp8_subtract_b_mmx_impl)
|
||||
global sym(vp8_subtract_b_mmx_impl) PRIVATE
|
||||
sym(vp8_subtract_b_mmx_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -75,7 +75,7 @@ sym(vp8_subtract_b_mmx_impl):
|
||||
|
||||
;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
|
||||
;unsigned char *pred, int pred_stride)
|
||||
global sym(vp8_subtract_mby_mmx)
|
||||
global sym(vp8_subtract_mby_mmx) PRIVATE
|
||||
sym(vp8_subtract_mby_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -150,7 +150,7 @@ sym(vp8_subtract_mby_mmx):
|
||||
; int src_stride, unsigned char *upred,
|
||||
; unsigned char *vpred, int pred_stride)
|
||||
|
||||
global sym(vp8_subtract_mbuv_mmx)
|
||||
global sym(vp8_subtract_mbuv_mmx) PRIVATE
|
||||
sym(vp8_subtract_mbuv_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -14,7 +14,7 @@
|
||||
;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
|
||||
; short *diff, unsigned char *Predictor,
|
||||
; int pitch);
|
||||
global sym(vp8_subtract_b_sse2_impl)
|
||||
global sym(vp8_subtract_b_sse2_impl) PRIVATE
|
||||
sym(vp8_subtract_b_sse2_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -73,7 +73,7 @@ sym(vp8_subtract_b_sse2_impl):
|
||||
|
||||
;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride,
|
||||
;unsigned char *pred, int pred_stride)
|
||||
global sym(vp8_subtract_mby_sse2)
|
||||
global sym(vp8_subtract_mby_sse2) PRIVATE
|
||||
sym(vp8_subtract_mby_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
@@ -146,7 +146,7 @@ sym(vp8_subtract_mby_sse2):
|
||||
;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc,
|
||||
; int src_stride, unsigned char *upred,
|
||||
; unsigned char *vpred, int pred_stride)
|
||||
global sym(vp8_subtract_mbuv_sse2)
|
||||
global sym(vp8_subtract_mbuv_sse2) PRIVATE
|
||||
sym(vp8_subtract_mbuv_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
@@ -20,7 +20,7 @@
|
||||
; int filter_weight, | 5
|
||||
; unsigned int *accumulator, | 6
|
||||
; unsigned short *count) | 7
|
||||
global sym(vp8_temporal_filter_apply_sse2)
|
||||
global sym(vp8_temporal_filter_apply_sse2) PRIVATE
|
||||
sym(vp8_temporal_filter_apply_sse2):
|
||||
|
||||
push rbp
|
||||
|
@@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_rtcd.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "vpx_version.h"
|
||||
@@ -227,7 +228,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
|
||||
if (cfg->ts_number_layers > 1)
|
||||
{
|
||||
int i;
|
||||
unsigned int i;
|
||||
RANGE_CHECK_HI(cfg, ts_periodicity, 16);
|
||||
|
||||
for (i=1; i<cfg->ts_number_layers; i++)
|
||||
@@ -568,6 +569,8 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx,
|
||||
|
||||
struct VP8_COMP *optr;
|
||||
|
||||
vpx_rtcd();
|
||||
|
||||
if (!ctx->priv)
|
||||
{
|
||||
priv = calloc(1, sizeof(struct vpx_codec_alg_priv));
|
||||
|
@@ -11,6 +11,7 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vpx_rtcd.h"
|
||||
#include "vpx/vpx_decoder.h"
|
||||
#include "vpx/vp8dx.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
@@ -187,6 +188,8 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
|
||||
vpx_codec_err_t res = VPX_CODEC_OK;
|
||||
(void) data;
|
||||
|
||||
vpx_rtcd();
|
||||
|
||||
/* This function only allocates space for the vpx_codec_alg_priv_t
|
||||
* structure. More memory may be required at the time the stream
|
||||
* information becomes known.
|
||||
|
@@ -99,6 +99,14 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
|
||||
|
||||
ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
|
||||
ifeq ($(HAVE_SSE2),yes)
|
||||
vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2
|
||||
endif
|
||||
endif
|
||||
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
|
||||
|
@@ -493,7 +493,7 @@ int main(int argc, char **argv) {
|
||||
// Cap CPU & first I-frame size
|
||||
vpx_codec_control (&codec, VP8E_SET_CPUUSED, -6);
|
||||
vpx_codec_control (&codec, VP8E_SET_STATIC_THRESHOLD, 800);
|
||||
vpx_codec_control (&codec, VP8E_SET_NOISE_SENSITIVITY, 2);
|
||||
vpx_codec_control (&codec, VP8E_SET_NOISE_SENSITIVITY, 1);
|
||||
|
||||
max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
|
||||
* ((double) cfg.g_timebase.den / cfg.g_timebase.num)
|
||||
|
@@ -113,6 +113,10 @@ extern "C" {
|
||||
* function directly, to ensure that the ABI version number parameter
|
||||
* is properly initialized.
|
||||
*
|
||||
* If the library was configured with --disable-multithread, this call
|
||||
* is not thread safe and should be guarded with a lock if being used
|
||||
* in a multithreaded context.
|
||||
*
|
||||
* In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
|
||||
* parameter), the storage pointed to by the cfg parameter must be
|
||||
* kept readable and stable until all memory maps have been set.
|
||||
|
@@ -655,6 +655,10 @@ extern "C" {
|
||||
* function directly, to ensure that the ABI version number parameter
|
||||
* is properly initialized.
|
||||
*
|
||||
* If the library was configured with --disable-multithread, this call
|
||||
* is not thread safe and should be guarded with a lock if being used
|
||||
* in a multithreaded context.
|
||||
*
|
||||
* In XMA mode (activated by setting VPX_CODEC_USE_XMA in the flags
|
||||
* parameter), the storage pointed to by the cfg parameter must be
|
||||
* kept readable and stable until all memory maps have been set.
|
||||
|
@@ -23,7 +23,7 @@
|
||||
#define BEGIN int main(void) {
|
||||
#define END return 0; }
|
||||
#else
|
||||
#define DEFINE(sym, val) int sym = val;
|
||||
#define DEFINE(sym, val) const int sym = val;
|
||||
#define BEGIN
|
||||
#define END
|
||||
#endif
|
||||
|
@@ -12,14 +12,14 @@
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
section .text
|
||||
global sym(vpx_reset_mmx_state)
|
||||
global sym(vpx_reset_mmx_state) PRIVATE
|
||||
sym(vpx_reset_mmx_state):
|
||||
emms
|
||||
ret
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__,x64
|
||||
global sym(vpx_winx64_fldcw)
|
||||
global sym(vpx_winx64_fldcw) PRIVATE
|
||||
sym(vpx_winx64_fldcw):
|
||||
sub rsp, 8
|
||||
mov [rsp], rcx ; win x64 specific
|
||||
@@ -28,7 +28,7 @@ sym(vpx_winx64_fldcw):
|
||||
ret
|
||||
|
||||
|
||||
global sym(vpx_winx64_fstcw)
|
||||
global sym(vpx_winx64_fstcw) PRIVATE
|
||||
sym(vpx_winx64_fstcw):
|
||||
sub rsp, 8
|
||||
fstcw [rsp]
|
||||
|
@@ -94,6 +94,31 @@
|
||||
%define sym(x) _ %+ x
|
||||
%endif
|
||||
|
||||
; PRIVATE
|
||||
; Macro for the attribute to hide a global symbol for the target ABI.
|
||||
; This is only active if CHROMIUM is defined.
|
||||
;
|
||||
; Chromium doesn't like exported global symbols due to symbol clashing with
|
||||
; plugins among other things.
|
||||
;
|
||||
; Requires Chromium's patched copy of yasm:
|
||||
; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761
|
||||
; http://www.tortall.net/projects/yasm/ticket/236
|
||||
;
|
||||
%ifdef CHROMIUM
|
||||
%ifidn __OUTPUT_FORMAT__,elf32
|
||||
%define PRIVATE :hidden
|
||||
%elifidn __OUTPUT_FORMAT__,elf64
|
||||
%define PRIVATE :hidden
|
||||
%elifidn __OUTPUT_FORMAT__,x64
|
||||
%define PRIVATE
|
||||
%else
|
||||
%define PRIVATE :private_extern
|
||||
%endif
|
||||
%else
|
||||
%define PRIVATE
|
||||
%endif
|
||||
|
||||
; arg()
|
||||
; Return the address specification of the given argument
|
||||
;
|
||||
@@ -181,7 +206,16 @@
|
||||
%endmacro
|
||||
%endif
|
||||
%endif
|
||||
%define HIDDEN_DATA(x) x
|
||||
|
||||
%ifdef CHROMIUM
|
||||
%ifidn __OUTPUT_FORMAT__,macho32
|
||||
%define HIDDEN_DATA(x) x:private_extern
|
||||
%else
|
||||
%define HIDDEN_DATA(x) x
|
||||
%endif
|
||||
%else
|
||||
%define HIDDEN_DATA(x) x
|
||||
%endif
|
||||
%else
|
||||
%macro GET_GOT 1
|
||||
%endmacro
|
||||
|
Reference in New Issue
Block a user