Compare commits
105 Commits
sandbox/ho
...
sandbox/jk
Author | SHA1 | Date | |
---|---|---|---|
![]() |
c40ea3f0c6 | ||
![]() |
7725a7eb56 | ||
![]() |
27dad21548 | ||
![]() |
94d4fee08f | ||
![]() |
2debd5b5f7 | ||
![]() |
404e998eb7 | ||
![]() |
d3dfcde0f7 | ||
![]() |
d419b93e3e | ||
![]() |
0ff10bb1f7 | ||
![]() |
bb6bcbccda | ||
![]() |
353246bd60 | ||
![]() |
9d0b2cbbce | ||
![]() |
4f8a166058 | ||
![]() |
6f53e59641 | ||
![]() |
41e6eceb28 | ||
![]() |
7d8199f0c3 | ||
![]() |
19054ab6da | ||
![]() |
fffa2a61d7 | ||
![]() |
c2b43164bd | ||
![]() |
9954d05ca6 | ||
![]() |
a39b5af10b | ||
![]() |
315e3c2518 | ||
![]() |
85e79ce288 | ||
![]() |
c96031da69 | ||
![]() |
cb14764fab | ||
![]() |
e5aaac24bb | ||
![]() |
cc17629f30 | ||
![]() |
13db80c282 | ||
![]() |
40dcae9c2e | ||
![]() |
615c90c948 | ||
![]() |
ddacf1cf69 | ||
![]() |
3273c7b679 | ||
![]() |
eaadfb5869 | ||
![]() |
adaf2b697c | ||
![]() |
58d2e70fc5 | ||
![]() |
0905af38fc | ||
![]() |
bb9c95ea53 | ||
![]() |
350ffe8dae | ||
![]() |
63fc44dfa5 | ||
![]() |
6bf7e2cc37 | ||
![]() |
ffc6aeef14 | ||
![]() |
c0a9cbebe1 | ||
![]() |
bf5f585b0d | ||
![]() |
209def2d72 | ||
![]() |
4aa12b6c5f | ||
![]() |
a870315629 | ||
![]() |
72ebafff51 | ||
![]() |
e5904f2d5e | ||
![]() |
07a7c08aef | ||
![]() |
a5ecaca6a7 | ||
![]() |
b18df82e1d | ||
![]() |
4e7e79f770 | ||
![]() |
385c2a76d1 | ||
![]() |
9e7fec216e | ||
![]() |
317f0da91e | ||
![]() |
4a15e55793 | ||
![]() |
60fde4d342 | ||
![]() |
6d19d40718 | ||
![]() |
f6214d1db8 | ||
![]() |
2d03f073a7 | ||
![]() |
408a8adc15 | ||
![]() |
8f279596cb | ||
![]() |
f3cb9ae459 | ||
![]() |
7cbe684ef5 | ||
![]() |
e9f513d74a | ||
![]() |
dcb23e2aaa | ||
![]() |
11a222f5d9 | ||
![]() |
73207a1d8b | ||
![]() |
27000ed6d9 | ||
![]() |
8a5c255b3d | ||
![]() |
bb30ffc4dc | ||
![]() |
3ee4e1e79f | ||
![]() |
3c18a2bb2e | ||
![]() |
cac54404b9 | ||
![]() |
c4887da39c | ||
![]() |
35bb74a6bd | ||
![]() |
e8e09d33df | ||
![]() |
82266a1ac9 | ||
![]() |
be3e0ff7c3 | ||
![]() |
0def48b60f | ||
![]() |
a3f71ccff6 | ||
![]() |
2caa36aa4f | ||
![]() |
999e155f55 | ||
![]() |
53d8e9dc97 | ||
![]() |
907e98fbb5 | ||
![]() |
58f19cc697 | ||
![]() |
dcaaadd8ed | ||
![]() |
af7d23c9b4 | ||
![]() |
2168a94495 | ||
![]() |
4e149bb447 | ||
![]() |
3bf235a4c9 | ||
![]() |
a69c18980f | ||
![]() |
336aa0b7da | ||
![]() |
eb8b4d9a99 | ||
![]() |
0ee525d6de | ||
![]() |
d3e9409bb0 | ||
![]() |
0822a62f40 | ||
![]() |
0cdfef1e22 | ||
![]() |
8064583d26 | ||
![]() |
419553258d | ||
![]() |
815e1e9fe4 | ||
![]() |
06e7320c3e | ||
![]() |
e867516843 | ||
![]() |
ce6c954d2e | ||
![]() |
15f9bea73b |
@@ -17,15 +17,17 @@ for i; do
|
||||
on_of=1
|
||||
elif [ "$i" == "-v" ]; then
|
||||
verbose=1
|
||||
elif [ "$i" == "-g" ]; then
|
||||
args="${args} --debug"
|
||||
elif [ "$on_of" == "1" ]; then
|
||||
outfile=$i
|
||||
on_of=0
|
||||
on_of=0
|
||||
elif [ -f "$i" ]; then
|
||||
infiles="$infiles $i"
|
||||
elif [ "${i:0:2}" == "-l" ]; then
|
||||
libs="$libs ${i#-l}"
|
||||
elif [ "${i:0:2}" == "-L" ]; then
|
||||
libpaths="${libpaths} ${i#-L}"
|
||||
libpaths="${libpaths} ${i#-L}"
|
||||
else
|
||||
args="${args} ${i}"
|
||||
fi
|
||||
|
@@ -78,6 +78,7 @@ Build options:
|
||||
--log=yes|no|FILE file configure log is written to [config.err]
|
||||
--target=TARGET target platform tuple [generic-gnu]
|
||||
--cpu=CPU optimize for a specific cpu rather than a family
|
||||
--extra-cflags=ECFLAGS add ECFLAGS to CFLAGS [$CFLAGS]
|
||||
${toggle_extra_warnings} emit harmless warnings (always non-fatal)
|
||||
${toggle_werror} treat warnings as errors, if possible
|
||||
(not available with all compilers)
|
||||
@@ -442,6 +443,9 @@ process_common_cmdline() {
|
||||
;;
|
||||
--cpu=*) tune_cpu="$optval"
|
||||
;;
|
||||
--extra-cflags=*)
|
||||
extra_cflags="${optval}"
|
||||
;;
|
||||
--enable-?*|--disable-?*)
|
||||
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
|
||||
echo "${CMDLINE_SELECT} ${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null || die_unknown $opt
|
||||
@@ -660,12 +664,12 @@ process_common_toolchain() {
|
||||
elif enabled armv7
|
||||
then
|
||||
check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-ftree-vectorize
|
||||
check_add_asflags -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-march=armv7-a
|
||||
check_add_asflags -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-march=armv7-a
|
||||
else
|
||||
check_add_cflags -march=${tgt_isa}
|
||||
check_add_asflags -march=${tgt_isa}
|
||||
fi
|
||||
|
||||
enabled debug && add_asflags -g
|
||||
asm_conversion_cmd="${source_path}/build/make/ads2gas.pl"
|
||||
;;
|
||||
rvct)
|
||||
@@ -690,16 +694,24 @@ process_common_toolchain() {
|
||||
arch_int=${tgt_isa##armv}
|
||||
arch_int=${arch_int%%te}
|
||||
check_add_asflags --pd "\"ARCHITECTURE SETA ${arch_int}\""
|
||||
enabled debug && add_asflags -g
|
||||
add_cflags --gnu
|
||||
add_cflags --enum_is_int
|
||||
add_cflags --wchar32
|
||||
;;
|
||||
esac
|
||||
|
||||
case ${tgt_os} in
|
||||
none*)
|
||||
disable multithread
|
||||
disable os_support
|
||||
;;
|
||||
darwin*)
|
||||
SDK_PATH=/Developer/Platforms/iPhoneOS.platform/Developer
|
||||
TOOLCHAIN_PATH=${SDK_PATH}/usr/bin
|
||||
CC=${TOOLCHAIN_PATH}/gcc
|
||||
AR=${TOOLCHAIN_PATH}/ar
|
||||
LD=${TOOLCHAIN_PATH}/arm-apple-darwin9-gcc-4.2.1
|
||||
LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-gcc-4.2.1
|
||||
AS=${TOOLCHAIN_PATH}/as
|
||||
STRIP=${TOOLCHAIN_PATH}/strip
|
||||
NM=${TOOLCHAIN_PATH}/nm
|
||||
@@ -713,14 +725,14 @@ process_common_toolchain() {
|
||||
add_cflags -arch ${tgt_isa}
|
||||
add_ldflags -arch_only ${tgt_isa}
|
||||
|
||||
add_cflags "-isysroot /Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS3.1.sdk"
|
||||
add_cflags "-isysroot /Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.sdk"
|
||||
|
||||
# This should be overridable
|
||||
alt_libc=${SDK_PATH}/SDKs/iPhoneOS3.1.sdk
|
||||
alt_libc=${SDK_PATH}/SDKs/iPhoneOS4.2.sdk
|
||||
|
||||
# Add the paths for the alternate libc
|
||||
# for d in usr/include usr/include/gcc/darwin/4.0/; do
|
||||
for d in usr/include usr/include/gcc/darwin/4.0/ usr/lib/gcc/arm-apple-darwin9/4.0.1/include/; do
|
||||
for d in usr/include usr/include/gcc/darwin/4.0/ usr/lib/gcc/arm-apple-darwin10/4.2.1/include/; do
|
||||
try_dir="${alt_libc}/${d}"
|
||||
[ -d "${try_dir}" ] && add_cflags -I"${try_dir}"
|
||||
done
|
||||
@@ -742,13 +754,9 @@ process_common_toolchain() {
|
||||
|| die "Must supply --libc when targetting *-linux-rvct"
|
||||
|
||||
# Set up compiler
|
||||
add_cflags --gnu
|
||||
add_cflags --enum_is_int
|
||||
add_cflags --library_interface=aeabi_glibc
|
||||
add_cflags --no_hide_all
|
||||
add_cflags --wchar32
|
||||
add_cflags --dwarf2
|
||||
add_cflags --gnu
|
||||
|
||||
# Set up linker
|
||||
add_ldflags --sysv --no_startup --no_ref_cpp_init
|
||||
@@ -972,6 +980,12 @@ EOF
|
||||
add_cflags -D_LARGEFILE_SOURCE
|
||||
add_cflags -D_FILE_OFFSET_BITS=64
|
||||
fi
|
||||
|
||||
# append any user defined extra cflags
|
||||
if [ -n "${extra_cflags}" ] ; then
|
||||
check_add_cflags ${extra_cflags} || \
|
||||
die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
|
||||
fi
|
||||
}
|
||||
|
||||
process_toolchain() {
|
||||
|
@@ -590,7 +590,7 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||
//log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
|
||||
//log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr);
|
||||
|
||||
fp = fopen("vpx_asm_offsets.asm", "w");
|
||||
fp = fopen("assembly_offsets.asm", "w");
|
||||
|
||||
if (fp == NULL)
|
||||
{
|
||||
|
10
configure
vendored
10
configure
vendored
@@ -40,7 +40,6 @@ Advanced options:
|
||||
${toggle_runtime_cpu_detect} runtime cpu detection
|
||||
${toggle_shared} shared library support
|
||||
${toggle_small} favor smaller size over speed
|
||||
${toggle_arm_asm_detok} assembly version of the detokenizer (ARM platforms only)
|
||||
${toggle_postproc_visualizer} macro block / block level visualizers
|
||||
|
||||
Codecs:
|
||||
@@ -79,11 +78,13 @@ EOF
|
||||
# alphabetically by architecture, generic-gnu last.
|
||||
all_platforms="${all_platforms} armv5te-linux-rvct"
|
||||
all_platforms="${all_platforms} armv5te-linux-gcc"
|
||||
all_platforms="${all_platforms} armv5te-none-rvct"
|
||||
all_platforms="${all_platforms} armv5te-symbian-gcc"
|
||||
all_platforms="${all_platforms} armv5te-wince-vs8"
|
||||
all_platforms="${all_platforms} armv6-darwin-gcc"
|
||||
all_platforms="${all_platforms} armv6-linux-rvct"
|
||||
all_platforms="${all_platforms} armv6-linux-gcc"
|
||||
all_platforms="${all_platforms} armv6-none-rvct"
|
||||
all_platforms="${all_platforms} armv6-symbian-gcc"
|
||||
all_platforms="${all_platforms} armv6-wince-vs8"
|
||||
all_platforms="${all_platforms} iwmmxt-linux-rvct"
|
||||
@@ -95,6 +96,7 @@ all_platforms="${all_platforms} iwmmxt2-wince-vs8"
|
||||
all_platforms="${all_platforms} armv7-darwin-gcc" #neon Cortex-A8
|
||||
all_platforms="${all_platforms} armv7-linux-rvct" #neon Cortex-A8
|
||||
all_platforms="${all_platforms} armv7-linux-gcc" #neon Cortex-A8
|
||||
all_platforms="${all_platforms} armv7-none-rvct" #neon Cortex-A8
|
||||
all_platforms="${all_platforms} mips32-linux-gcc"
|
||||
all_platforms="${all_platforms} ppc32-darwin8-gcc"
|
||||
all_platforms="${all_platforms} ppc32-darwin9-gcc"
|
||||
@@ -159,6 +161,7 @@ enable fast_unaligned #allow unaligned accesses, if supported by hw
|
||||
enable md5
|
||||
enable spatial_resampling
|
||||
enable multithread
|
||||
enable os_support
|
||||
|
||||
[ -d ${source_path}/../include ] && enable alt_tree_layout
|
||||
for d in vp8; do
|
||||
@@ -251,8 +254,8 @@ CONFIG_LIST="
|
||||
realtime_only
|
||||
shared
|
||||
small
|
||||
arm_asm_detok
|
||||
postproc_visualizer
|
||||
os_support
|
||||
"
|
||||
CMDLINE_SELECT="
|
||||
extra_warnings
|
||||
@@ -291,7 +294,6 @@ CMDLINE_SELECT="
|
||||
realtime_only
|
||||
shared
|
||||
small
|
||||
arm_asm_detok
|
||||
postproc_visualizer
|
||||
"
|
||||
|
||||
@@ -300,7 +302,7 @@ process_cmdline() {
|
||||
optval="${opt#*=}"
|
||||
case "$opt" in
|
||||
--disable-codecs) for c in ${CODECS}; do disable $c; done ;;
|
||||
*) process_common_cmdline $opt
|
||||
*) process_common_cmdline "$opt"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
10
examples.mk
10
examples.mk
@@ -93,8 +93,16 @@ vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame
|
||||
|
||||
|
||||
# Handle extra library flags depending on codec configuration
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
|
||||
|
||||
# We should not link to math library (libm) on RVCT
|
||||
# when building for bare-metal targets
|
||||
ifeq ($(CONFIG_OS_SUPPORT), yes)
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
|
||||
else
|
||||
ifeq ($(CONFIG_GCC), yes)
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
|
||||
endif
|
||||
endif
|
||||
#
|
||||
# End of specified files. The rest of the build rules should happen
|
||||
# automagically from here.
|
||||
|
31
libs.mk
31
libs.mk
@@ -230,10 +230,39 @@ endif
|
||||
#
|
||||
# Add assembler dependencies for configuration and offsets
|
||||
#
|
||||
#$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm $(BUILD_PFX)vpx_asm_offsets.asm
|
||||
$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
$(filter %.asm.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
|
||||
#
|
||||
# Calculate platform- and compiler-specific offsets for hand coded assembly
|
||||
#
|
||||
ifeq ($(ARCH_ARM), yes)
|
||||
asm_com_offsets.asm: obj_int_extract
|
||||
asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o
|
||||
./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o
|
||||
CLEAN-OBJS += asm_com_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm
|
||||
|
||||
ifeq ($(CONFIG_VP8_ENCODER), yes)
|
||||
asm_enc_offsets.asm: obj_int_extract
|
||||
asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
|
||||
./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
|
||||
CLEAN-OBJS += asm_enc_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP8_DECODER), yes)
|
||||
asm_dec_offsets.asm: obj_int_extract
|
||||
asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
|
||||
./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
|
||||
CLEAN-OBJS += asm_dec_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
|
||||
endif
|
||||
endif
|
||||
|
||||
$(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)
|
||||
CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
|
||||
|
||||
|
@@ -20,8 +20,6 @@
|
||||
* Still in the public domain.
|
||||
*/
|
||||
|
||||
#include <sys/types.h> /* for stupid systems */
|
||||
|
||||
#include <string.h> /* for memcpy() */
|
||||
|
||||
#include "md5_utils.h"
|
||||
|
@@ -15,19 +15,19 @@
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned short *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line,
|
||||
; r3 unsigned int output_height,
|
||||
; stack unsigned int output_width,
|
||||
; stack const short *vp8_filter
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned short *dst_ptr,
|
||||
; r2 unsigned int src_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;-------------------------------------
|
||||
; The output is transposed stroed in output array to make it easy for second pass filtering.
|
||||
|vp8_filter_block2d_bil_first_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; output width
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
mov r12, r3 ; outer-loop counter
|
||||
sub r2, r2, r4 ; src increment for height loop
|
||||
@@ -38,10 +38,10 @@
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
|
||||
mov r3, r3, lsl #1 ; output_height*2
|
||||
mov r3, r3, lsl #1 ; height*2
|
||||
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
|
||||
|
||||
mov r11, r1 ; save output_ptr for each row
|
||||
mov r11, r1 ; save dst_ptr for each row
|
||||
|
||||
cmp r5, #128 ; if filter coef = 128, then skip the filter
|
||||
beq bil_null_1st_filter
|
||||
@@ -140,17 +140,17 @@
|
||||
|
||||
;---------------------------------
|
||||
; r0 unsigned short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 int output_pitch,
|
||||
; r3 unsigned int output_height,
|
||||
; stack unsigned int output_width,
|
||||
; stack const short *vp8_filter
|
||||
; r1 unsigned char *dst_ptr,
|
||||
; r2 int dst_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter_block2d_bil_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; output width
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix
|
||||
|
@@ -243,8 +243,6 @@ skip_secondpass_hloop
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_filter8_coeff_
|
||||
|
@@ -10,128 +10,29 @@
|
||||
|
||||
|
||||
#include <math.h>
|
||||
#include "filter.h"
|
||||
#include "subpixel.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
static const short bilinear_filters[8][2] =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#if 0
|
||||
void vp8_filter_block2d_bil_first_pass_6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for ( i=0; i<output_height; i++ )
|
||||
{
|
||||
for ( j=0; j<output_width; j++ )
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
output_ptr[j] = ( ( (int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT/2) ) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_block2d_bil_second_pass_6
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i,j;
|
||||
int Temp;
|
||||
|
||||
for ( i=0; i<output_height; i++ )
|
||||
{
|
||||
for ( j=0; j<output_width; j++ )
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[output_width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT/2);
|
||||
output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
/*src_ptr += src_pixels_per_line - output_width;*/
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#include "arm/bilinearfilter_arm.h"
|
||||
|
||||
void vp8_filter_block2d_bil_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[36*16]; /* Temp data bufffer used in filtering */
|
||||
unsigned short FData[36*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
/* pixel_step = 1; */
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter);
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter);
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
@@ -148,8 +49,8 @@ void vp8_bilinear_predict4x4_armv6
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
@@ -167,8 +68,8 @@ void vp8_bilinear_predict8x8_armv6
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
}
|
||||
@@ -186,8 +87,8 @@ void vp8_bilinear_predict8x4_armv6
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
}
|
||||
@@ -205,8 +106,8 @@ void vp8_bilinear_predict16x16_armv6
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
||||
|
35
vp8/common/arm/bilinearfilter_arm.h
Normal file
35
vp8/common/arm/bilinearfilter_arm.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BILINEARFILTER_ARM_H
|
||||
#define BILINEARFILTER_ARM_H
|
||||
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6
|
||||
(
|
||||
const unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#endif /* BILINEARFILTER_ARM_H */
|
@@ -11,26 +11,10 @@
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <math.h>
|
||||
#include "filter.h"
|
||||
#include "subpixel.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) =
|
||||
{
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
};
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
@@ -93,11 +77,11 @@ void vp8_sixtap_predict_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* Vfilter is null. First pass only */
|
||||
if (xoffset && !yoffset)
|
||||
@@ -129,47 +113,6 @@ void vp8_sixtap_predict_armv6
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
void vp8_sixtap_predict8x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
|
||||
/*if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
|
||||
}*/
|
||||
/* Hfilter is null. Second pass only */
|
||||
/*else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
|
||||
else*/
|
||||
|
||||
vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter );
|
||||
|
||||
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter );
|
||||
/*}*/
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8_sixtap_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
@@ -182,10 +125,10 @@ void vp8_sixtap_predict8x8_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
@@ -224,10 +167,10 @@ void vp8_sixtap_predict16x16_armv6
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
|
@@ -41,13 +41,13 @@ void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -57,7 +57,7 @@ void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
@@ -65,13 +65,13 @@ void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -81,7 +81,7 @@ void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
@@ -94,10 +94,10 @@ void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -122,10 +122,10 @@ void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -148,10 +148,10 @@ void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -161,7 +161,7 @@ void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
@@ -169,10 +169,10 @@ void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -182,7 +182,7 @@ void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
@@ -195,7 +195,7 @@ void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -220,7 +220,7 @@ void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
|
@@ -350,10 +350,7 @@ filt_blk2d_spo16x16_loop_neon
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters16_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_bifilter16_coeff_
|
||||
DCD bifilter16_coeff
|
||||
bifilter16_coeff
|
||||
|
@@ -123,10 +123,7 @@ skip_secondpass_filter
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bilinearfilters4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_bifilter4_coeff_
|
||||
DCD bifilter4_coeff
|
||||
bifilter4_coeff
|
||||
|
@@ -128,10 +128,7 @@ skip_secondpass_filter
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters8x4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_bifilter8x4_coeff_
|
||||
DCD bifilter8x4_coeff
|
||||
bifilter8x4_coeff
|
||||
|
@@ -176,10 +176,7 @@ skip_secondpass_filter
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_bifilter8_coeff_
|
||||
DCD bifilter8_coeff
|
||||
bifilter8_coeff
|
||||
|
@@ -397,7 +397,8 @@
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
|
||||
|
||||
AREA loopfilter_dat, DATA, READONLY
|
||||
;-----------------
|
||||
|
||||
_lf_coeff_
|
||||
DCD lf_coeff
|
||||
lf_coeff
|
||||
|
@@ -104,10 +104,7 @@
|
||||
ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
|
||||
;-----------------
|
||||
AREA hloopfiltery_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_lfhy_coeff_
|
||||
DCD lfhy_coeff
|
||||
lfhy_coeff
|
||||
|
@@ -145,10 +145,7 @@
|
||||
ENDP ; |vp8_loop_filter_simple_vertical_edge_neon|
|
||||
|
||||
;-----------------
|
||||
AREA vloopfiltery_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_vlfy_coeff_
|
||||
DCD vlfy_coeff
|
||||
vlfy_coeff
|
||||
|
@@ -505,7 +505,8 @@
|
||||
bx lr
|
||||
ENDP ; |vp8_mbloop_filter_neon|
|
||||
|
||||
AREA mbloopfilter_dat, DATA, READONLY
|
||||
;-----------------
|
||||
|
||||
_mblf_coeff_
|
||||
DCD mblf_coeff
|
||||
mblf_coeff
|
||||
|
@@ -113,10 +113,7 @@
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA idct4x4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_idct_coeff_
|
||||
DCD idct_coeff
|
||||
idct_coeff
|
||||
|
@@ -476,10 +476,7 @@ secondpass_only_inner_loop_neon
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters16_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_filter16_coeff_
|
||||
DCD filter16_coeff
|
||||
filter16_coeff
|
||||
|
@@ -407,10 +407,7 @@ secondpass_filter4x4_only
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_filter4_coeff_
|
||||
DCD filter4_coeff
|
||||
filter4_coeff
|
||||
|
@@ -458,10 +458,7 @@ secondpass_filter8x4_only
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_filter8_coeff_
|
||||
DCD filter8_coeff
|
||||
filter8_coeff
|
||||
|
@@ -509,10 +509,7 @@ filt_blk2d_spo8x8_loop_neon
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_filter8_coeff_
|
||||
DCD filter8_coeff
|
||||
filter8_coeff
|
||||
|
49
vp8/common/asm_com_offsets.c
Normal file
49
vp8/common/asm_com_offsets.c
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <stddef.h>
|
||||
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#define ct_assert(name,cond) \
|
||||
static void assert_##name(void) UNUSED;\
|
||||
static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
|
||||
|
||||
#define DEFINE(sym, val) int sym = val;
|
||||
|
||||
/*
|
||||
#define BLANK() asm volatile("\n->" : : )
|
||||
*/
|
||||
|
||||
/*
|
||||
* int main(void)
|
||||
* {
|
||||
*/
|
||||
|
||||
//vpx_scale
|
||||
DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
|
||||
DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
|
||||
DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
|
||||
DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
|
||||
DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
|
||||
DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
|
||||
DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
|
||||
DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
|
||||
DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
|
||||
DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
|
||||
|
||||
//add asserts for any offset that is not supported by assembly code
|
||||
//add asserts for any size that is not supported by assembly code
|
||||
/*
|
||||
* return 0;
|
||||
* }
|
||||
*/
|
@@ -10,13 +10,10 @@
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "filter.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
|
||||
static const int bilinear_filters[8][2] =
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
@@ -28,8 +25,7 @@ static const int bilinear_filters[8][2] =
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
|
||||
static const short sub_pel_filters[8][6] =
|
||||
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
|
||||
{
|
||||
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
@@ -40,9 +36,6 @@ static const short sub_pel_filters[8][6] =
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
void vp8_filter_block2d_first_pass
|
||||
@@ -146,7 +139,7 @@ void vp8_filter_block2d
|
||||
const short *VFilter
|
||||
)
|
||||
{
|
||||
int FData[9*4]; /* Temp data bufffer used in filtering */
|
||||
int FData[9*4]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
@@ -195,8 +188,8 @@ void vp8_sixtap_predict_c
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
}
|
||||
@@ -212,10 +205,10 @@ void vp8_sixtap_predict8x8_c
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data bufffer used in filtering */
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
@@ -238,10 +231,10 @@ void vp8_sixtap_predict8x4_c
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data bufffer used in filtering */
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
@@ -264,11 +257,11 @@ void vp8_sixtap_predict16x16_c
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[21*24]; /* Temp data bufffer used in filtering */
|
||||
int FData[21*24]; /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
@@ -283,57 +276,50 @@ void vp8_sixtap_predict16x16_c
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_first_pass
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pixels_per_line : Stride of input block.
|
||||
* UINT32 pixel_step : Offset between filter input samples (see notes).
|
||||
* UINT32 output_height : Input block height.
|
||||
* UINT32 output_width : Input block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_stride : Stride of source block.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : INT32 *output_ptr : Pointer to filtered block.
|
||||
* OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
|
||||
* either horizontal or vertical direction to produce the
|
||||
* filtered output block. Used to implement first-pass
|
||||
* of 2-D separable filter.
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the horizontal direction to produce the filtered output
|
||||
* block. Used to implement first-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
* pixel_step defines whether the filter is applied
|
||||
* horizontally (pixel_step=1) or vertically (pixel_step=stride).
|
||||
* It defines the offset required to move from one input
|
||||
* to the next.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_filter_block2d_bil_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int *vp8_filter
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_stride,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
src_ptr += src_stride - width;
|
||||
dst_ptr += width;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -341,60 +327,51 @@ void vp8_filter_block2d_bil_first_pass
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_second_pass
|
||||
*
|
||||
* INPUTS : INT32 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pixels_per_line : Stride of input block.
|
||||
* UINT32 pixel_step : Offset between filter input samples (see notes).
|
||||
* UINT32 output_height : Input block height.
|
||||
* UINT32 output_width : Input block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
* INPUTS : INT32 *src_ptr : Pointer to source block.
|
||||
* UINT32 dst_pitch : Destination block pitch.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
|
||||
* either horizontal or vertical direction to produce the
|
||||
* filtered output block. Used to implement second-pass
|
||||
* of 2-D separable filter.
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the vertical direction to produce the filtered output
|
||||
* block. Used to implement second-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
* pixel_step defines whether the filter is applied
|
||||
* horizontally (pixel_step=1) or vertically (pixel_step=stride).
|
||||
* It defines the offset required to move from one input
|
||||
* to the next.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_filter_block2d_bil_second_pass
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int *vp8_filter
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[1]) +
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2);
|
||||
output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_pitch;
|
||||
dst_ptr += dst_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -404,11 +381,14 @@ void vp8_filter_block2d_bil_second_pass
|
||||
* ROUTINE : filter_block2d_bil
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pixels_per_line : Stride of input block.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter taps.
|
||||
* INT32 *VFilter : Array of 2 vertical filter taps.
|
||||
* UINT32 src_pitch : Stride of source block.
|
||||
* UINT32 dst_pitch : Stride of destination block.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter taps.
|
||||
* INT32 *VFilter : Array of 2 vertical filter taps.
|
||||
* INT32 Width : Block width
|
||||
* INT32 Height : Block height
|
||||
*
|
||||
* OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
@@ -422,23 +402,23 @@ void vp8_filter_block2d_bil_second_pass
|
||||
void vp8_filter_block2d_bil
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const int *HFilter,
|
||||
const int *VFilter,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[17*16]; /* Temp data bufffer used in filtering */
|
||||
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);
|
||||
vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
|
||||
vp8_filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
@@ -452,11 +432,11 @@ void vp8_bilinear_predict4x4_c
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
#if 0
|
||||
{
|
||||
int i;
|
||||
@@ -490,11 +470,11 @@ void vp8_bilinear_predict8x8_c
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
|
||||
@@ -510,11 +490,11 @@ void vp8_bilinear_predict8x4_c
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
|
||||
@@ -530,11 +510,11 @@ void vp8_bilinear_predict16x16_c
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@@ -9,14 +9,14 @@
|
||||
*/
|
||||
|
||||
|
||||
#ifndef DETOKENIZE_ARM_H
|
||||
#define DETOKENIZE_ARM_H
|
||||
#ifndef FILTER_H
|
||||
#define FILTER_H
|
||||
|
||||
#if HAVE_ARMV6
|
||||
#if CONFIG_ARM_ASM_DETOK
|
||||
void vp8_init_detokenizer(VP8D_COMP *dx);
|
||||
void vp8_decode_mb_tokens_v6(DETOK *detoken, int type);
|
||||
#endif
|
||||
#endif
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
#endif
|
||||
extern const short vp8_bilinear_filters[8][2];
|
||||
extern const short vp8_sub_pel_filters[8][6];
|
||||
|
||||
#endif //FILTER_H
|
@@ -11,47 +11,9 @@
|
||||
|
||||
#include "findnearmv.h"
|
||||
|
||||
#define FINDNEAR_SEARCH_SITES 3
|
||||
|
||||
/* Predict motion vectors using those from already-decoded nearby blocks.
|
||||
Note that we only consider one 4x4 subblock from each candidate 16x16
|
||||
macroblock. */
|
||||
|
||||
typedef union
|
||||
{
|
||||
unsigned int as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates rapid equality tests */
|
||||
|
||||
static void mv_bias(const MODE_INFO *x, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
MV xmv;
|
||||
xmv = x->mbmi.mv.as_mv;
|
||||
|
||||
if (ref_frame_sign_bias[x->mbmi.ref_frame] != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
xmv.row *= -1;
|
||||
xmv.col *= -1;
|
||||
}
|
||||
|
||||
mvp->as_mv = xmv;
|
||||
}
|
||||
|
||||
|
||||
void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
}
|
||||
|
||||
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
@@ -82,7 +44,7 @@ void vp8_find_near_mvs
|
||||
if (above->mbmi.mv.as_int)
|
||||
{
|
||||
(++mv)->as_int = above->mbmi.mv.as_int;
|
||||
mv_bias(above, refframe, mv, ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
|
||||
++cntx;
|
||||
}
|
||||
|
||||
@@ -97,7 +59,7 @@ void vp8_find_near_mvs
|
||||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = left->mbmi.mv.as_int;
|
||||
mv_bias(left, refframe, &this_mv, ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
@@ -119,7 +81,7 @@ void vp8_find_near_mvs
|
||||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = aboveleft->mbmi.mv.as_int;
|
||||
mv_bias(aboveleft, refframe, &this_mv, ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
|
@@ -17,6 +17,41 @@
|
||||
#include "modecont.h"
|
||||
#include "treecoder.h"
|
||||
|
||||
typedef union
|
||||
{
|
||||
unsigned int as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates rapid equality tests */
|
||||
|
||||
static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
MV xmv;
|
||||
xmv = mvp->as_mv;
|
||||
|
||||
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
xmv.row *= -1;
|
||||
xmv.col *= -1;
|
||||
}
|
||||
|
||||
mvp->as_mv = xmv;
|
||||
}
|
||||
|
||||
#define LEFT_TOP_MARGIN (16 << 3)
|
||||
#define RIGHT_BOTTOM_MARGIN (16 << 3)
|
||||
static void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
}
|
||||
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
@@ -35,8 +70,4 @@ const B_MODE_INFO *vp8_left_bmi(const MODE_INFO *cur_mb, int b);
|
||||
|
||||
const B_MODE_INFO *vp8_above_bmi(const MODE_INFO *cur_mb, int b, int mi_stride);
|
||||
|
||||
#define LEFT_TOP_MARGIN (16 << 3)
|
||||
#define RIGHT_BOTTOM_MARGIN (16 << 3)
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -28,13 +28,13 @@ void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -44,7 +44,7 @@ void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
@@ -52,13 +52,13 @@ void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -68,7 +68,7 @@ void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
@@ -81,10 +81,10 @@ void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned c
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -109,10 +109,10 @@ void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned c
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -137,8 +137,6 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
|
||||
|
||||
int block_inside_limit = 0;
|
||||
int HEVThresh;
|
||||
const int yhedge_boost = 2;
|
||||
const int uvhedge_boost = 2;
|
||||
|
||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
@@ -182,15 +180,9 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
lfi[i].lim[j] = block_inside_limit;
|
||||
lfi[i].mbflim[j] = filt_lvl + yhedge_boost;
|
||||
lfi[i].mbthr[j] = HEVThresh;
|
||||
lfi[i].mbflim[j] = filt_lvl + 2;
|
||||
lfi[i].flim[j] = filt_lvl;
|
||||
lfi[i].thr[j] = HEVThresh;
|
||||
lfi[i].uvlim[j] = block_inside_limit;
|
||||
lfi[i].uvmbflim[j] = filt_lvl + uvhedge_boost;
|
||||
lfi[i].uvmbthr[j] = HEVThresh;
|
||||
lfi[i].uvflim[j] = filt_lvl;
|
||||
lfi[i].uvthr[j] = HEVThresh;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -249,57 +241,52 @@ void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
/*lfi[i].lim[j] = block_inside_limit;
|
||||
lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/
|
||||
lfi[i].mbthr[j] = HEVThresh;
|
||||
lfi[i].mbflim[j] = filt_lvl+2;*/
|
||||
/*lfi[i].flim[j] = filt_lvl;*/
|
||||
lfi[i].thr[j] = HEVThresh;
|
||||
/*lfi[i].uvlim[j] = block_inside_limit;
|
||||
lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/
|
||||
lfi[i].uvmbthr[j] = HEVThresh;
|
||||
/*lfi[i].uvflim[j] = filt_lvl;*/
|
||||
lfi[i].uvthr[j] = HEVThresh;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level)
|
||||
int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level)
|
||||
{
|
||||
MB_MODE_INFO *mbmi = &mbd->mode_info_context->mbmi;
|
||||
|
||||
if (mbd->mode_ref_lf_delta_enabled)
|
||||
{
|
||||
/* Apply delta for reference frame */
|
||||
*filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
|
||||
filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
|
||||
|
||||
/* Apply delta for mode */
|
||||
if (mbmi->ref_frame == INTRA_FRAME)
|
||||
{
|
||||
/* Only the split mode BPRED has a further special case */
|
||||
if (mbmi->mode == B_PRED)
|
||||
*filter_level += mbd->mode_lf_deltas[0];
|
||||
filter_level += mbd->mode_lf_deltas[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Zero motion mode */
|
||||
if (mbmi->mode == ZEROMV)
|
||||
*filter_level += mbd->mode_lf_deltas[1];
|
||||
filter_level += mbd->mode_lf_deltas[1];
|
||||
|
||||
/* Split MB motion mode */
|
||||
else if (mbmi->mode == SPLITMV)
|
||||
*filter_level += mbd->mode_lf_deltas[3];
|
||||
filter_level += mbd->mode_lf_deltas[3];
|
||||
|
||||
/* All other inter motion modes (Nearest, Near, New) */
|
||||
else
|
||||
*filter_level += mbd->mode_lf_deltas[2];
|
||||
filter_level += mbd->mode_lf_deltas[2];
|
||||
}
|
||||
|
||||
/* Range check */
|
||||
if (*filter_level > MAX_LOOP_FILTER)
|
||||
*filter_level = MAX_LOOP_FILTER;
|
||||
else if (*filter_level < 0)
|
||||
*filter_level = 0;
|
||||
if (filter_level > MAX_LOOP_FILTER)
|
||||
filter_level = MAX_LOOP_FILTER;
|
||||
else if (filter_level < 0)
|
||||
filter_level = 0;
|
||||
}
|
||||
return filter_level;
|
||||
}
|
||||
|
||||
|
||||
@@ -373,7 +360,7 @@ void vp8_loop_filter_frame
|
||||
* These specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
vp8_adjust_mb_lf_value(mbd, &filter_level);
|
||||
filter_level = vp8_adjust_mb_lf_value(mbd, filter_level);
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
@@ -473,7 +460,7 @@ void vp8_loop_filter_frame_yonly
|
||||
filter_level = baseline_filter_level[Segment];
|
||||
|
||||
/* Apply any context driven MB level adjustment */
|
||||
vp8_adjust_mb_lf_value(mbd, &filter_level);
|
||||
filter_level = vp8_adjust_mb_lf_value(mbd, filter_level);
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
|
@@ -32,12 +32,6 @@ typedef struct
|
||||
DECLARE_ALIGNED(16, signed char, flim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, thr[16]);
|
||||
DECLARE_ALIGNED(16, signed char, mbflim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, mbthr[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvlim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvflim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvthr[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvmbflim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvmbthr[16]);
|
||||
} loop_filter_info;
|
||||
|
||||
|
||||
|
@@ -200,7 +200,7 @@ typedef struct VP8Common
|
||||
} VP8_COMMON;
|
||||
|
||||
|
||||
void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level);
|
||||
int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level);
|
||||
void vp8_init_loop_filter(VP8_COMMON *cm);
|
||||
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
|
||||
extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
|
||||
|
@@ -56,10 +56,10 @@ void loop_filter_mbh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned ch
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void)simpler_lpf;
|
||||
mbloop_filter_horizontal_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr);
|
||||
mbloop_filter_horizontal_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr);
|
||||
|
||||
if (u_ptr)
|
||||
mbloop_filter_horizontal_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr);
|
||||
mbloop_filter_horizontal_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr);
|
||||
}
|
||||
|
||||
void loop_filter_mbhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -77,10 +77,10 @@ void loop_filter_mbv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned ch
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void)simpler_lpf;
|
||||
mbloop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr);
|
||||
mbloop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr);
|
||||
|
||||
if (u_ptr)
|
||||
mbloop_filter_vertical_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr);
|
||||
mbloop_filter_vertical_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr);
|
||||
}
|
||||
|
||||
void loop_filter_mbvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -104,7 +104,7 @@ void loop_filter_bh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned cha
|
||||
loop_filter_horizontal_edge_y_ppc(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr);
|
||||
|
||||
if (u_ptr)
|
||||
loop_filter_horizontal_edge_uv_ppc(u_ptr + 4 * uv_stride, v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr);
|
||||
loop_filter_horizontal_edge_uv_ppc(u_ptr + 4 * uv_stride, v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr);
|
||||
}
|
||||
|
||||
void loop_filter_bhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
@@ -127,7 +127,7 @@ void loop_filter_bv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned cha
|
||||
loop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->flim, lfi->lim, lfi->thr);
|
||||
|
||||
if (u_ptr)
|
||||
loop_filter_vertical_edge_uv_ppc(u_ptr + 4, v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr);
|
||||
loop_filter_vertical_edge_uv_ppc(u_ptr + 4, v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr);
|
||||
}
|
||||
|
||||
void loop_filter_bvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
|
@@ -14,6 +14,8 @@
|
||||
|
||||
#define VPXINFINITE 10000 /* 10second. */
|
||||
|
||||
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
|
||||
|
||||
/* Thread management macros */
|
||||
#ifdef _WIN32
|
||||
/* Win32 */
|
||||
@@ -88,4 +90,6 @@
|
||||
#define x86_pause_hint()
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
|
||||
|
||||
#endif
|
||||
|
@@ -45,13 +45,13 @@ void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
|
||||
@@ -71,13 +71,13 @@ void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -88,7 +88,7 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
|
||||
@@ -102,10 +102,10 @@ void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -132,10 +132,10 @@ void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -159,10 +159,10 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
@@ -173,7 +173,7 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
|
||||
@@ -182,10 +182,10 @@ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
@@ -196,7 +196,7 @@ void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
|
||||
@@ -210,7 +210,7 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
|
||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
|
||||
@@ -237,7 +237,7 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
|
||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -14,7 +14,6 @@
|
||||
#include "blockd.h"
|
||||
#include "pragmas.h"
|
||||
#include "postproc.h"
|
||||
#include "dboolhuff.h"
|
||||
#include "dequantize.h"
|
||||
#include "onyxd_int.h"
|
||||
|
||||
@@ -35,12 +34,6 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
|
||||
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
|
||||
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
|
||||
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
|
||||
#if 0 /*For use with RTCD, when implemented*/
|
||||
pbi->dboolhuff.start = vp8dx_start_decode_c;
|
||||
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
|
||||
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
|
||||
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -54,12 +47,6 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
|
||||
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
|
||||
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
|
||||
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
|
||||
#if 0 /*For use with RTCD, when implemented*/
|
||||
pbi->dboolhuff.start = vp8dx_start_decode_c;
|
||||
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
|
||||
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
|
||||
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -1,163 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_decode_value_v6|
|
||||
EXPORT |vp8dx_start_decode_v6|
|
||||
EXPORT |vp8dx_stop_decode_v6|
|
||||
EXPORT |vp8dx_decode_bool_v6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
INCLUDE vpx_asm_offsets.asm
|
||||
|
||||
br RN r0
|
||||
prob RN r1
|
||||
bits RN r1
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
; int z = 0;
|
||||
; int bit;
|
||||
; for ( bit=bits-1; bit>=0; bit-- )
|
||||
; {
|
||||
; z |= (vp8dx_decode_bool(br, 0x80)<<bit);
|
||||
; }
|
||||
; return z;
|
||||
|
||||
;int vp8_decode_value_v6 ( BOOL_DECODER *br, int bits )
|
||||
|vp8_decode_value_v6| PROC
|
||||
stmdb sp!, {r4 - r6, lr}
|
||||
mov r4, br
|
||||
mov r5, bits
|
||||
mov r6, #0
|
||||
|
||||
subs r5, r5, #1
|
||||
bmi decode_value_exit
|
||||
|
||||
decode_value_loop
|
||||
mov prob, #0x80
|
||||
mov br, r4
|
||||
bl vp8dx_decode_bool_v6_internal ; needed for conversion to s file
|
||||
orr r6, r6, r0, lsl r5
|
||||
subs r5, r5, #1
|
||||
bpl decode_value_loop
|
||||
|
||||
decode_value_exit
|
||||
mov r0, r6
|
||||
ldmia sp!, {r4 - r6, pc}
|
||||
ENDP ; |vp8_decode_value_v6|
|
||||
|
||||
|
||||
;void vp8dx_start_decode_v6 ( BOOL_DECODER *br, unsigned char *source )
|
||||
|vp8dx_start_decode_v6| PROC
|
||||
stmdb sp!, {r4 - r5, lr}
|
||||
mov r2, #0
|
||||
mov r3, #255
|
||||
|
||||
str r2, [br, #bool_decoder_lowvalue]
|
||||
str r3, [br, #bool_decoder_range]
|
||||
str r1, [br, #bool_decoder_buffer]
|
||||
|
||||
mov r3, #8
|
||||
mov r2, #4
|
||||
str r3, [br, #bool_decoder_count]
|
||||
str r2, [br, #bool_decoder_pos]
|
||||
|
||||
ldrb r2, [r1, #3]
|
||||
ldrb r3, [r1, #2]
|
||||
ldrb r4, [r1, #1]
|
||||
ldrb r5, [r1]
|
||||
|
||||
orr r1, r2, r3, lsl #8
|
||||
orr r1, r1, r4, lsl #16
|
||||
orr r1, r1, r5, lsl #24
|
||||
|
||||
str r1, [br, #bool_decoder_value]
|
||||
|
||||
ldmia sp!, {r4 - r5, pc}
|
||||
ENDP ; |vp8dx_start_decode_v6|
|
||||
|
||||
|
||||
;void vp8dx_stop_decode_v6 ( BOOL_DECODER *bc );
|
||||
|vp8dx_stop_decode_v6| PROC
|
||||
mov pc, lr
|
||||
ENDP ; |vp8dx_stop_decode_v6|
|
||||
|
||||
|
||||
; bigsplit RN r1
|
||||
; buffer_v RN r1
|
||||
; count_v RN r4
|
||||
; range_v RN r2
|
||||
; value_v RN r3
|
||||
; pos_v RN r5
|
||||
; split RN r6
|
||||
; bit RN lr
|
||||
;int vp8dx_decode_bool_v6 ( BOOL_DECODER *br, int probability )
|
||||
|vp8dx_decode_bool_v6| PROC
|
||||
vp8dx_decode_bool_v6_internal
|
||||
stmdb sp!, {r4 - r6, lr}
|
||||
|
||||
ldr r2, [br, #bool_decoder_range]
|
||||
ldr r3, [br, #bool_decoder_value]
|
||||
|
||||
mov r6, r2, lsl #8
|
||||
sub r6, r6, #256 ; split = 1 + (((range-1) * probability) >> 8)
|
||||
mov r12, #1
|
||||
smlawb r6, r6, prob, r12
|
||||
|
||||
mov lr, #0
|
||||
subs r5, r3, r6, lsl #24
|
||||
|
||||
;cmp r3, r1
|
||||
movhs lr, #1
|
||||
movhs r3, r5
|
||||
subhs r2, r2, r6
|
||||
movlo r2, r6
|
||||
|
||||
cmp r2, #0x80
|
||||
blt range_less_0x80
|
||||
;strd r2, r3, [br, #bool_decoder_range]
|
||||
str r2, [br, #bool_decoder_range]
|
||||
str r3, [br, #bool_decoder_value]
|
||||
mov r0, lr
|
||||
ldmia sp!, {r4 - r6, pc}
|
||||
|
||||
range_less_0x80
|
||||
ldr r5, [br, #bool_decoder_pos]
|
||||
ldr r1, [br, #bool_decoder_buffer]
|
||||
ldr r4, [br, #bool_decoder_count]
|
||||
add r1, r1, r5
|
||||
|
||||
clz r12, r2
|
||||
sub r12, r12, #24
|
||||
subs r4, r4, r12
|
||||
ldrleb r6, [r1], #1
|
||||
mov r2, r2, lsl r12
|
||||
mov r3, r3, lsl r12
|
||||
addle r4, r4, #8
|
||||
rsble r12, r4, #8
|
||||
addle r5, r5, #1
|
||||
orrle r3, r3, r6, lsl r12
|
||||
|
||||
;strd r2, r3, [br, #bool_decoder_range]
|
||||
;strd r4, r5, [br, #bool_decoder_count]
|
||||
str r2, [br, #bool_decoder_range]
|
||||
str r3, [br, #bool_decoder_value]
|
||||
str r4, [br, #bool_decoder_count]
|
||||
str r5, [br, #bool_decoder_pos]
|
||||
|
||||
mov r0, lr
|
||||
|
||||
ldmia sp!, {r4 - r6, pc}
|
||||
ENDP ; |vp8dx_decode_bool_v6|
|
||||
|
||||
END
|
@@ -1,43 +0,0 @@
|
||||
#ifndef DBOOLHUFF_ARM_H
|
||||
#define DBOOLHUFF_ARM_H
|
||||
|
||||
/* JLK
|
||||
* There are currently no arm-optimized versions of
|
||||
* these functions. As they are implemented, they
|
||||
* can be uncommented below and added to
|
||||
* arm/dsystemdependent.c
|
||||
*
|
||||
* The existing asm code is likely so different as
|
||||
* to be useless. However, its been left (for now)
|
||||
* for reference.
|
||||
*/
|
||||
#if 0
|
||||
#if HAVE_ARMV6
|
||||
#undef vp8_dbool_start
|
||||
#define vp8_dbool_start vp8dx_start_decode_v6
|
||||
|
||||
#undef vp8_dbool_fill
|
||||
#define vp8_dbool_fill vp8_bool_decoder_fill_v6
|
||||
|
||||
#undef vp8_dbool_debool
|
||||
#define vp8_dbool_debool vp8_decode_bool_v6
|
||||
|
||||
#undef vp8_dbool_devalue
|
||||
#define vp8_dbool_devalue vp8_decode_value_v6
|
||||
#endif /* HAVE_ARMV6 */
|
||||
|
||||
#if HAVE_ARMV7
|
||||
#undef vp8_dbool_start
|
||||
#define vp8_dbool_start vp8dx_start_decode_neon
|
||||
|
||||
#undef vp8_dbool_fill
|
||||
#define vp8_dbool_fill vp8_bool_decoder_fill_neon
|
||||
|
||||
#undef vp8_dbool_debool
|
||||
#define vp8_dbool_debool vp8_decode_bool_neon
|
||||
|
||||
#undef vp8_dbool_devalue
|
||||
#define vp8_dbool_devalue vp8_decode_value_neon
|
||||
#endif /* HAVE_ARMV7 */
|
||||
#endif
|
||||
#endif /* DBOOLHUFF_ARM_H */
|
@@ -1,320 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_decode_mb_tokens_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
INCLUDE vpx_asm_offsets.asm
|
||||
|
||||
l_qcoeff EQU 0
|
||||
l_i EQU 4
|
||||
l_type EQU 8
|
||||
l_stop EQU 12
|
||||
l_c EQU 16
|
||||
l_l_ptr EQU 20
|
||||
l_a_ptr EQU 24
|
||||
l_bc EQU 28
|
||||
l_coef_ptr EQU 32
|
||||
l_stacksize EQU 64
|
||||
|
||||
|
||||
;; constant offsets -- these should be created at build time
|
||||
c_block2above_offset EQU 25
|
||||
c_entropy_nodes EQU 11
|
||||
c_dct_eob_token EQU 11
|
||||
|
||||
|vp8_decode_mb_tokens_v6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
sub sp, sp, #l_stacksize
|
||||
mov r7, r1 ; type
|
||||
mov r9, r0 ; detoken
|
||||
|
||||
ldr r1, [r9, #detok_current_bc]
|
||||
ldr r0, [r9, #detok_qcoeff_start_ptr]
|
||||
mov r11, #0 ; i
|
||||
mov r3, #16 ; stop
|
||||
|
||||
cmp r7, #1 ; type ?= 1
|
||||
addeq r11, r11, #24 ; i = 24
|
||||
addeq r3, r3, #8 ; stop = 24
|
||||
addeq r0, r0, #3, 24 ; qcoefptr += 24*16
|
||||
|
||||
str r0, [sp, #l_qcoeff]
|
||||
str r11, [sp, #l_i]
|
||||
str r7, [sp, #l_type]
|
||||
str r3, [sp, #l_stop]
|
||||
str r1, [sp, #l_bc]
|
||||
|
||||
add lr, r9, r7, lsl #2 ; detoken + type*4
|
||||
|
||||
ldr r8, [r1, #bool_decoder_user_buffer]
|
||||
|
||||
ldr r10, [lr, #detok_coef_probs]
|
||||
ldr r5, [r1, #bool_decoder_count]
|
||||
ldr r6, [r1, #bool_decoder_range]
|
||||
ldr r4, [r1, #bool_decoder_value]
|
||||
|
||||
str r10, [sp, #l_coef_ptr]
|
||||
|
||||
BLOCK_LOOP
|
||||
ldr r3, [r9, #detok_ptr_block2leftabove]
|
||||
ldr r1, [r9, #detok_L]
|
||||
ldr r2, [r9, #detok_A]
|
||||
ldrb r12, [r3, r11]! ; block2left[i]
|
||||
ldrb r3, [r3, #c_block2above_offset]; block2above[i]
|
||||
|
||||
cmp r7, #0 ; c = !type
|
||||
moveq r7, #1
|
||||
movne r7, #0
|
||||
|
||||
ldrb r0, [r1, r12]! ; *(L += block2left[i])
|
||||
ldrb r3, [r2, r3]! ; *(A += block2above[i])
|
||||
mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11
|
||||
|
||||
; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
|
||||
cmp r0, #0 ; *l ?= 0
|
||||
movne r0, #1
|
||||
cmp r3, #0 ; *a ?= 0
|
||||
addne r0, r0, #1 ; t
|
||||
|
||||
str r1, [sp, #l_l_ptr] ; save &l
|
||||
str r2, [sp, #l_a_ptr] ; save &a
|
||||
smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES)
|
||||
mov r1, #0 ; t = 0
|
||||
str r7, [sp, #l_c]
|
||||
|
||||
;align 4
|
||||
COEFF_LOOP
|
||||
ldr r3, [r9, #detok_ptr_coef_bands_x]
|
||||
ldr lr, [r9, #detok_coef_tree_ptr]
|
||||
;STALL
|
||||
ldrb r3, [r3, r7] ; coef_bands_x[c]
|
||||
;STALL
|
||||
;STALL
|
||||
add r0, r0, r3 ; Prob += coef_bands_x[c]
|
||||
|
||||
get_token_loop
|
||||
ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1]
|
||||
mov r3, r6, lsl #8 ; range << 8
|
||||
sub r3, r3, #256 ; (range << 8) - (1 << 8)
|
||||
mov r10, #1 ; 1
|
||||
|
||||
smlawb r2, r3, r2, r10 ; split = 1 + (((range-1) * probability) >> 8)
|
||||
|
||||
ldrb r12, [r8] ; load cx data byte in stall slot : r8 = bufptr
|
||||
;++
|
||||
|
||||
subs r3, r4, r2, lsl #24 ; value-(split<<24): used later to calculate shift for NORMALIZE
|
||||
addhs r1, r1, #1 ; t += 1
|
||||
movhs r4, r3 ; value -= bigsplit (split << 24)
|
||||
subhs r2, r6, r2 ; range -= split
|
||||
; movlo r6, r2 ; range = split
|
||||
|
||||
ldrsb r1, [lr, r1] ; t = onyx_coef_tree_ptr[t]
|
||||
|
||||
; NORMALIZE
|
||||
clz r3, r2 ; vp8dx_bitreader_norm[range] + 24
|
||||
sub r3, r3, #24 ; vp8dx_bitreader_norm[range]
|
||||
subs r5, r5, r3 ; count -= shift
|
||||
mov r6, r2, lsl r3 ; range <<= shift
|
||||
mov r4, r4, lsl r3 ; value <<= shift
|
||||
|
||||
; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
|
||||
addle r5, r5, #8 ; count += 8
|
||||
rsble r3, r5, #24 ; 24 - count
|
||||
addle r8, r8, #1 ; bufptr++
|
||||
orrle r4, r4, r12, lsl r3 ; value |= *bufptr << shift + 16
|
||||
|
||||
cmp r1, #0 ; t ?= 0
|
||||
bgt get_token_loop ; while (t > 0)
|
||||
|
||||
cmn r1, #c_dct_eob_token ; if(t == -DCT_EOB_TOKEN)
|
||||
beq END_OF_BLOCK ; break
|
||||
|
||||
rsb lr, r1, #0 ; v = -t;
|
||||
|
||||
cmp lr, #4 ; if(v > FOUR_TOKEN)
|
||||
ble SKIP_EXTRABITS
|
||||
|
||||
ldr r3, [r9, #detok_teb_base_ptr]
|
||||
mov r11, #1 ; 1 in split = 1 + ... nope, v+= 1 << bits_count
|
||||
add r7, r3, lr, lsl #4 ; detok_teb_base_ptr + (v << 4)
|
||||
|
||||
ldrsh lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
|
||||
ldrsh r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
|
||||
|
||||
extrabits_loop
|
||||
add r3, r0, r7 ; &teb_ptr->Probs[bits_count]
|
||||
|
||||
ldrb r2, [r3, #4] ; probability. why +4?
|
||||
mov r3, r6, lsl #8 ; range << 8
|
||||
sub r3, r3, #256 ; range << 8 + 1 << 8
|
||||
|
||||
smlawb r2, r3, r2, r11 ; split = 1 + (((range-1) * probability) >> 8)
|
||||
|
||||
ldrb r12, [r8] ; *bufptr
|
||||
;++
|
||||
|
||||
subs r10, r4, r2, lsl #24 ; value - (split<<24)
|
||||
movhs r4, r10 ; value = value - (split << 24)
|
||||
subhs r2, r6, r2 ; range = range - split
|
||||
addhs lr, lr, r11, lsl r0 ; v += ((UINT16)1<<bits_count)
|
||||
|
||||
; NORMALIZE
|
||||
clz r3, r2 ; shift - leading zeros in split
|
||||
sub r3, r3, #24 ; don't count first 3 bytes
|
||||
subs r5, r5, r3 ; count -= shift
|
||||
mov r6, r2, lsl r3 ; range = range << shift
|
||||
mov r4, r4, lsl r3 ; value <<= shift
|
||||
|
||||
addle r5, r5, #8 ; count += BR_COUNT
|
||||
addle r8, r8, #1 ; bufptr++
|
||||
rsble r3, r5, #24 ; BR_COUNT - count
|
||||
orrle r4, r4, r12, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
|
||||
|
||||
subs r0, r0, #1 ; bits_count --
|
||||
bpl extrabits_loop
|
||||
|
||||
|
||||
SKIP_EXTRABITS
|
||||
ldr r11, [sp, #l_qcoeff]
|
||||
ldr r0, [sp, #l_coef_ptr] ; Prob = coef_probs
|
||||
|
||||
cmp r1, #0 ; check for nonzero token - if (t)
|
||||
beq SKIP_EOB_CHECK ; if t is zero, we will skip the eob table chec
|
||||
|
||||
add r3, r6, #1 ; range + 1
|
||||
mov r2, r3, lsr #1 ; split = (range + 1) >> 1
|
||||
|
||||
subs r3, r4, r2, lsl #24 ; value - (split<<24)
|
||||
movhs r4, r3 ; value -= (split << 24)
|
||||
subhs r2, r6, r2 ; range -= split
|
||||
mvnhs r3, lr ; -v
|
||||
addhs lr, r3, #1 ; v = (v ^ -1) + 1
|
||||
|
||||
; NORMALIZE
|
||||
clz r3, r2 ; leading 0s in split
|
||||
sub r3, r3, #24 ; shift
|
||||
subs r5, r5, r3 ; count -= shift
|
||||
mov r6, r2, lsl r3 ; range <<= shift
|
||||
mov r4, r4, lsl r3 ; value <<= shift
|
||||
ldrleb r2, [r8], #1 ; *(bufptr++)
|
||||
addle r5, r5, #8 ; count += 8
|
||||
rsble r3, r5, #24 ; BR_COUNT - count
|
||||
orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
|
||||
|
||||
add r0, r0, #11 ; Prob += ENTROPY_NODES (11)
|
||||
|
||||
cmn r1, #1 ; t < -ONE_TOKEN
|
||||
|
||||
addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11)
|
||||
|
||||
mvn r1, #1 ; t = -1 ???? C is -2
|
||||
|
||||
SKIP_EOB_CHECK
|
||||
ldr r7, [sp, #l_c] ; c
|
||||
ldr r3, [r9, #detok_scan]
|
||||
add r1, r1, #2 ; t+= 2
|
||||
cmp r7, #15 ; c should will be one higher
|
||||
|
||||
ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value
|
||||
add r7, r7, #1 ; c++
|
||||
add r3, r11, r3, lsl #1 ; qcoeff + scan[c]
|
||||
|
||||
str r7, [sp, #l_c] ; store c
|
||||
strh lr, [r3] ; qcoef_ptr[scan[c]] = v
|
||||
|
||||
blt COEFF_LOOP
|
||||
|
||||
sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c
|
||||
|
||||
END_OF_BLOCK
|
||||
ldr r3, [sp, #l_type] ; type
|
||||
ldr r10, [sp, #l_coef_ptr] ; coef_ptr
|
||||
ldr r0, [sp, #l_qcoeff] ; qcoeff
|
||||
ldr r11, [sp, #l_i] ; i
|
||||
ldr r12, [sp, #l_stop] ; stop
|
||||
|
||||
cmp r3, #0 ; type ?= 0
|
||||
moveq r1, #1
|
||||
movne r1, #0
|
||||
add r3, r11, r9 ; detok + i
|
||||
|
||||
cmp r7, r1 ; c ?= !type
|
||||
strb r7, [r3, #detok_eob] ; eob[i] = c
|
||||
|
||||
ldr r7, [sp, #l_l_ptr] ; l
|
||||
ldr r2, [sp, #l_a_ptr] ; a
|
||||
movne r3, #1 ; t
|
||||
moveq r3, #0
|
||||
|
||||
add r0, r0, #32 ; qcoeff += 32 (16 * 2?)
|
||||
add r11, r11, #1 ; i++
|
||||
strb r3, [r7] ; *l = t
|
||||
strb r3, [r2] ; *a = t
|
||||
str r0, [sp, #l_qcoeff] ; qcoeff
|
||||
str r11, [sp, #l_i] ; i
|
||||
|
||||
cmp r11, r12 ; i < stop
|
||||
ldr r7, [sp, #l_type] ; type
|
||||
|
||||
blt BLOCK_LOOP
|
||||
|
||||
cmp r11, #25 ; i ?= 25
|
||||
bne ln2_decode_mb_to
|
||||
|
||||
ldr r12, [r9, #detok_qcoeff_start_ptr]
|
||||
ldr r10, [r9, #detok_coef_probs]
|
||||
mov r7, #0 ; type/i = 0
|
||||
mov r3, #16 ; stop = 16
|
||||
str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr
|
||||
str r7, [sp, #l_i]
|
||||
str r7, [sp, #l_type]
|
||||
str r3, [sp, #l_stop]
|
||||
|
||||
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0]
|
||||
|
||||
b BLOCK_LOOP
|
||||
|
||||
ln2_decode_mb_to
|
||||
cmp r11, #16 ; i ?= 16
|
||||
bne ln1_decode_mb_to
|
||||
|
||||
mov r10, #detok_coef_probs
|
||||
add r10, r10, #2*4 ; coef_probs[type]
|
||||
ldr r10, [r9, r10] ; detok + detok_coef_probs[type]
|
||||
|
||||
mov r7, #2 ; type = 2
|
||||
mov r3, #24 ; stop = 24
|
||||
|
||||
str r7, [sp, #l_type]
|
||||
str r3, [sp, #l_stop]
|
||||
|
||||
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type]
|
||||
b BLOCK_LOOP
|
||||
|
||||
ln1_decode_mb_to
|
||||
ldr r2, [sp, #l_bc]
|
||||
mov r0, #0
|
||||
nop
|
||||
|
||||
str r8, [r2, #bool_decoder_user_buffer]
|
||||
str r5, [r2, #bool_decoder_count]
|
||||
str r4, [r2, #bool_decoder_value]
|
||||
str r6, [r2, #bool_decoder_range]
|
||||
|
||||
add sp, sp, #l_stacksize
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP ; |vp8_decode_mb_tokens_v6|
|
||||
|
||||
END
|
@@ -1,160 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_decode_value_neon|
|
||||
EXPORT |vp8dx_start_decode_neon|
|
||||
EXPORT |vp8dx_stop_decode_neon|
|
||||
EXPORT |vp8dx_decode_bool_neon|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
INCLUDE vpx_asm_offsets.asm
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
; int z = 0;
|
||||
; int bit;
|
||||
; for ( bit=bits-1; bit>=0; bit-- )
|
||||
; {
|
||||
; z |= (vp8dx_decode_bool(br, 0x80)<<bit);
|
||||
; }
|
||||
; return z;
|
||||
|
||||
;int vp8_decode_value_neon ( BOOL_DECODER *br, int bits )
|
||||
|vp8_decode_value_neon| PROC
|
||||
stmdb sp!, {r4 - r6, lr}
|
||||
mov r4, r0
|
||||
mov r5, r1
|
||||
mov r6, #0
|
||||
|
||||
subs r5, r5, #1
|
||||
bmi decode_value_exit
|
||||
|
||||
decode_value_loop
|
||||
mov r1, #0x80
|
||||
mov r0, r4
|
||||
bl vp8dx_decode_bool_neon_internal ; needed for conversion to s file
|
||||
orr r6, r6, r0, lsl r5
|
||||
subs r5, r5, #1
|
||||
bpl decode_value_loop
|
||||
|
||||
decode_value_exit
|
||||
mov r0, r6
|
||||
ldmia sp!, {r4 - r6, pc}
|
||||
ENDP ; |vp8_decode_value_neon|
|
||||
|
||||
|
||||
;void vp8dx_start_decode_neon ( BOOL_DECODER *br, unsigned char *source )
|
||||
|vp8dx_start_decode_neon| PROC
|
||||
stmdb sp!, {r4 - r5, lr}
|
||||
mov r2, #0
|
||||
mov r3, #255
|
||||
|
||||
str r2, [r0, #bool_decoder_lowvalue]
|
||||
str r3, [r0, #bool_decoder_range]
|
||||
str r1, [r0, #bool_decoder_buffer]
|
||||
|
||||
mov r3, #8
|
||||
mov r2, #4
|
||||
str r3, [r0, #bool_decoder_count]
|
||||
str r2, [r0, #bool_decoder_pos]
|
||||
|
||||
ldrb r2, [r1, #3]
|
||||
ldrb r3, [r1, #2]
|
||||
ldrb r4, [r1, #1]
|
||||
ldrb r5, [r1]
|
||||
|
||||
orr r1, r2, r3, lsl #8
|
||||
orr r1, r1, r4, lsl #16
|
||||
orr r1, r1, r5, lsl #24
|
||||
|
||||
str r1, [r0, #bool_decoder_value]
|
||||
|
||||
ldmia sp!, {r4 - r5, pc}
|
||||
ENDP ; |vp8dx_start_decode_neon|
|
||||
|
||||
|
||||
;void vp8dx_stop_decode_neon ( BOOL_DECODER *bc );
|
||||
|vp8dx_stop_decode_neon| PROC
|
||||
mov pc, lr
|
||||
ENDP ; |vp8dx_stop_decode_neon|
|
||||
|
||||
|
||||
; bigsplit RN r1
|
||||
; buffer_v RN r1
|
||||
; count_v RN r4
|
||||
; range_v RN r2
|
||||
; value_v RN r3
|
||||
; pos_v RN r5
|
||||
; split RN r6
|
||||
; bit RN lr
|
||||
;int vp8dx_decode_bool_neon ( BOOL_DECODER *br, int probability )
|
||||
|vp8dx_decode_bool_neon| PROC
|
||||
vp8dx_decode_bool_neon_internal
|
||||
;LDRD and STRD doubleword data transfers must be eight-byte aligned. Use ALIGN 8
|
||||
;before memory allocation
|
||||
stmdb sp!, {r4 - r5, lr}
|
||||
|
||||
ldr r2, [r0, #bool_decoder_range] ;load range (r2), value(r3)
|
||||
ldr r3, [r0, #bool_decoder_value]
|
||||
;ldrd r2, r3, [r0, #bool_decoder_range] ;ldrd costs 2 cycles
|
||||
;
|
||||
|
||||
mov r4, r2, lsl #8
|
||||
sub r4, r4, #256
|
||||
mov r12, #1
|
||||
|
||||
smlawb r4, r4, r1, r12 ;split = 1 + (((range-1) * probability) >> 8)
|
||||
|
||||
mov lr, r0
|
||||
mov r0, #0 ;bit = 0
|
||||
;
|
||||
subs r5, r3, r4, lsl #24
|
||||
|
||||
subhs r2, r2, r4 ;range = br->range-split
|
||||
movlo r2, r4 ;range = split
|
||||
movhs r0, #1 ;bit = 1
|
||||
movhs r3, r5 ;value = value-bigsplit
|
||||
|
||||
cmp r2, #0x80
|
||||
blt range_less_0x80
|
||||
strd r2, r3, [lr, #bool_decoder_range] ;store result
|
||||
|
||||
ldmia sp!, {r4 - r5, pc}
|
||||
|
||||
range_less_0x80
|
||||
|
||||
ldrd r4, r5, [lr, #bool_decoder_count] ;load count, pos, buffer
|
||||
ldr r1, [lr, #bool_decoder_buffer]
|
||||
|
||||
clz r12, r2
|
||||
add r1, r1, r5
|
||||
|
||||
sub r12, r12, #24
|
||||
subs r4, r4, r12 ;count -= shift
|
||||
mov r2, r2, lsl r12 ;range <<= shift
|
||||
mov r3, r3, lsl r12 ;value <<= shift
|
||||
addle r4, r4, #8 ;count += 8
|
||||
ldrleb r12, [r1], #1 ;br->buffer[br->pos]
|
||||
|
||||
rsble r1, r4, #8 ;-count
|
||||
addle r5, r5, #1 ;br->pos++
|
||||
orrle r3, r3, r12, lsl r1 ;value |= (br->buffer[br->pos]) << (-count)
|
||||
|
||||
strd r2, r3, [lr, #bool_decoder_range] ;store result
|
||||
strd r4, r5, [lr, #bool_decoder_count]
|
||||
|
||||
ldmia sp!, {r4 - r5, pc}
|
||||
ENDP ; |vp8dx_decode_bool_neon|
|
||||
|
||||
END
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@@ -12,13 +12,7 @@
|
||||
#include "vpx_ports/config.h"
|
||||
#include <stddef.h>
|
||||
|
||||
#if CONFIG_VP8_ENCODER
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP8_DECODER
|
||||
#include "onyxd_int.h"
|
||||
#endif
|
||||
|
||||
#define DEFINE(sym, val) int sym = val;
|
||||
|
||||
@@ -31,29 +25,6 @@
|
||||
* {
|
||||
*/
|
||||
|
||||
#if CONFIG_VP8_DECODER || CONFIG_VP8_ENCODER
|
||||
DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
|
||||
DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
|
||||
DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
|
||||
DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
|
||||
DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
|
||||
DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
|
||||
DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
|
||||
DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
|
||||
DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
|
||||
DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP8_DECODER
|
||||
DEFINE(mb_diff, offsetof(MACROBLOCKD, diff));
|
||||
DEFINE(mb_predictor, offsetof(MACROBLOCKD, predictor));
|
||||
DEFINE(mb_dst_y_stride, offsetof(MACROBLOCKD, dst.y_stride));
|
||||
DEFINE(mb_dst_y_buffer, offsetof(MACROBLOCKD, dst.y_buffer));
|
||||
DEFINE(mb_dst_u_buffer, offsetof(MACROBLOCKD, dst.u_buffer));
|
||||
DEFINE(mb_dst_v_buffer, offsetof(MACROBLOCKD, dst.v_buffer));
|
||||
DEFINE(mb_up_available, offsetof(MACROBLOCKD, up_available));
|
||||
DEFINE(mb_left_available, offsetof(MACROBLOCKD, left_available));
|
||||
|
||||
DEFINE(detok_scan, offsetof(DETOK, scan));
|
||||
DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove));
|
||||
DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr));
|
||||
@@ -77,7 +48,6 @@ DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
|
||||
|
||||
DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val));
|
||||
DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length));
|
||||
#endif
|
||||
|
||||
//add asserts for any offset that is not supported by assembly code
|
||||
//add asserts for any size that is not supported by assembly code
|
@@ -26,8 +26,9 @@ DECLARE_ALIGNED(16, const unsigned char, vp8dx_bitreader_norm[256]) =
|
||||
};
|
||||
|
||||
|
||||
int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
|
||||
unsigned int source_sz)
|
||||
int vp8dx_start_decode(BOOL_DECODER *br,
|
||||
const unsigned char *source,
|
||||
unsigned int source_sz)
|
||||
{
|
||||
br->user_buffer_end = source+source_sz;
|
||||
br->user_buffer = source;
|
||||
@@ -39,13 +40,13 @@ int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
|
||||
return 1;
|
||||
|
||||
/* Populate the buffer */
|
||||
vp8dx_bool_decoder_fill_c(br);
|
||||
vp8dx_bool_decoder_fill(br);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
|
||||
void vp8dx_bool_decoder_fill(BOOL_DECODER *br)
|
||||
{
|
||||
const unsigned char *bufptr;
|
||||
const unsigned char *bufend;
|
||||
@@ -62,69 +63,3 @@ void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
|
||||
br->value = value;
|
||||
br->count = count;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Until optimized versions of these functions are available, we
|
||||
* keep the implementation in the header to allow inlining.
|
||||
*
|
||||
* The RTCD-style invocations are still in place so this can
|
||||
* be switched by just uncommenting these functions here and
|
||||
* the DBOOLHUFF_INVOKE calls in the header.
|
||||
*/
|
||||
int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
|
||||
{
|
||||
unsigned int bit=0;
|
||||
VP8_BD_VALUE value;
|
||||
unsigned int split;
|
||||
VP8_BD_VALUE bigsplit;
|
||||
int count;
|
||||
unsigned int range;
|
||||
|
||||
value = br->value;
|
||||
count = br->count;
|
||||
range = br->range;
|
||||
|
||||
split = 1 + (((range-1) * probability) >> 8);
|
||||
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
|
||||
|
||||
range = split;
|
||||
if(value >= bigsplit)
|
||||
{
|
||||
range = br->range-split;
|
||||
value = value-bigsplit;
|
||||
bit = 1;
|
||||
}
|
||||
|
||||
/*if(range>=0x80)
|
||||
{
|
||||
br->value = value;
|
||||
br->range = range;
|
||||
return bit;
|
||||
}*/
|
||||
|
||||
{
|
||||
register unsigned int shift = vp8dx_bitreader_norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
}
|
||||
br->value = value;
|
||||
br->count = count;
|
||||
br->range = range;
|
||||
if (count < 0)
|
||||
vp8dx_bool_decoder_fill_c(br);
|
||||
return bit;
|
||||
}
|
||||
|
||||
int vp8dx_decode_value_c(BOOL_DECODER *br, int bits)
|
||||
{
|
||||
int z = 0;
|
||||
int bit;
|
||||
for ( bit=bits-1; bit>=0; bit-- )
|
||||
{
|
||||
z |= (vp8dx_decode_bool(br, 0x80)<<bit);
|
||||
}
|
||||
return z;
|
||||
}
|
||||
#endif
|
||||
|
@@ -25,10 +25,6 @@ typedef size_t VP8_BD_VALUE;
|
||||
Even relatively modest values like 100 would work fine.*/
|
||||
# define VP8_LOTS_OF_BITS (0x40000000)
|
||||
|
||||
|
||||
|
||||
struct vp8_dboolhuff_rtcd_vtable;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const unsigned char *user_buffer_end;
|
||||
@@ -36,82 +32,15 @@ typedef struct
|
||||
VP8_BD_VALUE value;
|
||||
int count;
|
||||
unsigned int range;
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
struct vp8_dboolhuff_rtcd_vtable *rtcd;
|
||||
#endif
|
||||
} BOOL_DECODER;
|
||||
|
||||
#define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \
|
||||
const unsigned char *source, unsigned int source_sz)
|
||||
#define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
|
||||
#define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
|
||||
#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits)
|
||||
|
||||
#if ARCH_ARM
|
||||
#include "arm/dboolhuff_arm.h"
|
||||
#endif
|
||||
|
||||
#ifndef vp8_dbool_start
|
||||
#define vp8_dbool_start vp8dx_start_decode_c
|
||||
#endif
|
||||
|
||||
#ifndef vp8_dbool_fill
|
||||
#define vp8_dbool_fill vp8dx_bool_decoder_fill_c
|
||||
#endif
|
||||
|
||||
#ifndef vp8_dbool_debool
|
||||
#define vp8_dbool_debool vp8dx_decode_bool_c
|
||||
#endif
|
||||
|
||||
#ifndef vp8_dbool_devalue
|
||||
#define vp8_dbool_devalue vp8dx_decode_value_c
|
||||
#endif
|
||||
|
||||
extern prototype_dbool_start(vp8_dbool_start);
|
||||
extern prototype_dbool_fill(vp8_dbool_fill);
|
||||
extern prototype_dbool_debool(vp8_dbool_debool);
|
||||
extern prototype_dbool_devalue(vp8_dbool_devalue);
|
||||
|
||||
typedef prototype_dbool_start((*vp8_dbool_start_fn_t));
|
||||
typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t));
|
||||
typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t));
|
||||
typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t));
|
||||
|
||||
typedef struct vp8_dboolhuff_rtcd_vtable {
|
||||
vp8_dbool_start_fn_t start;
|
||||
vp8_dbool_fill_fn_t fill;
|
||||
vp8_dbool_debool_fn_t debool;
|
||||
vp8_dbool_devalue_fn_t devalue;
|
||||
} vp8_dboolhuff_rtcd_vtable_t;
|
||||
|
||||
/* There are no processor-specific versions of these
|
||||
* functions right now. Disable RTCD to avoid using
|
||||
* function pointers which gives a speed boost
|
||||
*/
|
||||
/*#ifdef ENABLE_RUNTIME_CPU_DETECT
|
||||
#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn
|
||||
#define IF_RTCD(x) (x)
|
||||
#else*/
|
||||
#define DBOOLHUFF_INVOKE(ctx,fn) vp8_dbool_##fn
|
||||
#define IF_RTCD(x) NULL
|
||||
/*#endif*/
|
||||
|
||||
DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
|
||||
|
||||
/* wrapper functions to hide RTCD. static means inline means hopefully no
|
||||
* penalty
|
||||
*/
|
||||
static int vp8dx_start_decode(BOOL_DECODER *br,
|
||||
struct vp8_dboolhuff_rtcd_vtable *rtcd,
|
||||
const unsigned char *source, unsigned int source_sz) {
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
br->rtcd = rtcd;
|
||||
#endif
|
||||
return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz);
|
||||
}
|
||||
static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
|
||||
DBOOLHUFF_INVOKE(br->rtcd, fill)(br);
|
||||
}
|
||||
int vp8dx_start_decode(BOOL_DECODER *br,
|
||||
const unsigned char *source,
|
||||
unsigned int source_sz);
|
||||
|
||||
void vp8dx_bool_decoder_fill(BOOL_DECODER *br);
|
||||
|
||||
/*The refill loop is used in several places, so define it in a macro to make
|
||||
sure they're all consistent.
|
||||
@@ -138,12 +67,6 @@ static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
|
||||
|
||||
|
||||
static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
|
||||
/*
|
||||
* Until optimized versions of this function are available, we
|
||||
* keep the implementation in the header to allow inlining.
|
||||
*
|
||||
*return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability);
|
||||
*/
|
||||
unsigned int bit = 0;
|
||||
VP8_BD_VALUE value;
|
||||
unsigned int split;
|
||||
@@ -167,13 +90,6 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
|
||||
bit = 1;
|
||||
}
|
||||
|
||||
/*if(range>=0x80)
|
||||
{
|
||||
br->value = value;
|
||||
br->range = range;
|
||||
return bit
|
||||
}*/
|
||||
|
||||
{
|
||||
register unsigned int shift = vp8dx_bitreader_norm[range];
|
||||
range <<= shift;
|
||||
@@ -190,12 +106,6 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
|
||||
|
||||
static int vp8_decode_value(BOOL_DECODER *br, int bits)
|
||||
{
|
||||
/*
|
||||
* Until optimized versions of this function are available, we
|
||||
* keep the implementation in the header to allow inlining.
|
||||
*
|
||||
*return DBOOLHUFF_INVOKE(br->rtcd, devalue)(br, bits);
|
||||
*/
|
||||
int z = 0;
|
||||
int bit;
|
||||
|
||||
|
@@ -475,8 +475,7 @@ static void setup_token_decoder(VP8D_COMP *pbi,
|
||||
"Truncated packet or corrupt partition "
|
||||
"%d length", i + 1);
|
||||
|
||||
if (vp8dx_start_decode(bool_decoder, IF_RTCD(&pbi->dboolhuff),
|
||||
partition, partition_size))
|
||||
if (vp8dx_start_decode(bool_decoder, partition, partition_size))
|
||||
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate bool decoder %d", i + 1);
|
||||
|
||||
@@ -485,9 +484,11 @@ static void setup_token_decoder(VP8D_COMP *pbi,
|
||||
bool_decoder++;
|
||||
}
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* Clamp number of decoder threads */
|
||||
if (pbi->decoding_thread_count > num_part - 1)
|
||||
pbi->decoding_thread_count = num_part - 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -651,8 +652,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
|
||||
init_frame(pbi);
|
||||
|
||||
if (vp8dx_start_decode(bc, IF_RTCD(&pbi->dboolhuff),
|
||||
data, data_end - data))
|
||||
if (vp8dx_start_decode(bc, data, data_end - data))
|
||||
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate bool decoder 0");
|
||||
if (pc->frame_type == KEY_FRAME) {
|
||||
@@ -846,7 +846,9 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
|
||||
|
||||
/* set up frame new frame for intra coded blocks */
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level))
|
||||
#endif
|
||||
vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
|
||||
|
||||
vp8_setup_block_dptrs(xd);
|
||||
@@ -866,6 +868,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
|
||||
vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO));
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION)
|
||||
{
|
||||
vp8mt_decode_mb_rows(pbi, xd);
|
||||
@@ -880,6 +883,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
int ibc = 0;
|
||||
int num_part = 1 << pc->multi_token_partition;
|
||||
|
@@ -74,37 +74,6 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_ARM_ASM_DETOK
|
||||
/* mashup of vp8_block2left and vp8_block2above so we only need one pointer
|
||||
* for the assembly version.
|
||||
*/
|
||||
DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) =
|
||||
{
|
||||
/* vp8_block2left */
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
|
||||
/* vp8_block2above */
|
||||
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
|
||||
};
|
||||
|
||||
void vp8_init_detokenizer(VP8D_COMP *dx)
|
||||
{
|
||||
const VP8_COMMON *const oc = & dx->common;
|
||||
MACROBLOCKD *x = & dx->mb;
|
||||
|
||||
dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree;
|
||||
dx->detoken.ptr_block2leftabove = vp8_block2leftabove;
|
||||
dx->detoken.ptr_coef_bands_x = vp8_coef_bands_x;
|
||||
dx->detoken.scan = vp8_default_zig_zag1d;
|
||||
dx->detoken.teb_base_ptr = vp8d_token_extra_bits2;
|
||||
dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
|
||||
|
||||
dx->detoken.coef_probs[0] = (oc->fc.coef_probs [0] [ 0 ] [0]);
|
||||
dx->detoken.coef_probs[1] = (oc->fc.coef_probs [1] [ 0 ] [0]);
|
||||
dx->detoken.coef_probs[2] = (oc->fc.coef_probs [2] [ 0 ] [0]);
|
||||
dx->detoken.coef_probs[3] = (oc->fc.coef_probs [3] [ 0 ] [0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
|
||||
#define FILL \
|
||||
if(count < 0) \
|
||||
@@ -202,35 +171,6 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
|
||||
}\
|
||||
NORMALIZE
|
||||
|
||||
#if CONFIG_ARM_ASM_DETOK
|
||||
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
|
||||
{
|
||||
int eobtotal = 0;
|
||||
int i, type;
|
||||
|
||||
dx->detoken.current_bc = x->current_bc;
|
||||
dx->detoken.A = x->above_context;
|
||||
dx->detoken.L = x->left_context;
|
||||
|
||||
type = 3;
|
||||
|
||||
if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
type = 1;
|
||||
eobtotal -= 16;
|
||||
}
|
||||
|
||||
vp8_decode_mb_tokens_v6(&dx->detoken, type);
|
||||
|
||||
for (i = 0; i < 25; i++)
|
||||
{
|
||||
x->eobs[i] = dx->detoken.eob[i];
|
||||
eobtotal += dx->detoken.eob[i];
|
||||
}
|
||||
|
||||
return eobtotal;
|
||||
}
|
||||
#else
|
||||
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
|
||||
{
|
||||
ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
|
||||
@@ -423,4 +363,3 @@ BLOCK_FINISHED:
|
||||
return eobtotal;
|
||||
|
||||
}
|
||||
#endif /*!CONFIG_ASM_DETOK*/
|
||||
|
@@ -14,10 +14,6 @@
|
||||
|
||||
#include "onyxd_int.h"
|
||||
|
||||
#if ARCH_ARM
|
||||
#include "arm/detokenize_arm.h"
|
||||
#endif
|
||||
|
||||
void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
|
||||
int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
|
||||
|
||||
|
@@ -27,12 +27,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
|
||||
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_c;
|
||||
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c;
|
||||
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
|
||||
pbi->dboolhuff.start = vp8dx_start_decode_c;
|
||||
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
|
||||
#if 0 /*For use with RTCD, when implemented*/
|
||||
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
|
||||
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
|
@@ -114,8 +114,10 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
|
||||
pbi->ready_for_new_data = 1;
|
||||
|
||||
pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/
|
||||
#if CONFIG_MULTITHREAD
|
||||
pbi->max_threads = oxcf->max_threads;
|
||||
vp8_decoder_create_threads(pbi);
|
||||
#endif
|
||||
|
||||
/* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
|
||||
* unnecessary calling of vp8cx_init_de_quantizer() for every frame.
|
||||
@@ -131,9 +133,6 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
}
|
||||
|
||||
#if CONFIG_ARM_ASM_DETOK
|
||||
vp8_init_detokenizer(pbi);
|
||||
#endif
|
||||
pbi->common.error.setjmp = 0;
|
||||
return (VP8D_PTR) pbi;
|
||||
}
|
||||
@@ -149,8 +148,8 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr)
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (pbi->b_multithreaded_rd)
|
||||
vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);
|
||||
#endif
|
||||
vp8_decoder_remove_threads(pbi);
|
||||
#endif
|
||||
vp8_remove_common(&pbi->common);
|
||||
vpx_free(pbi);
|
||||
}
|
||||
@@ -407,6 +406,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
return retcode;
|
||||
}
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION)
|
||||
{
|
||||
if (swap_frame_buffers (cm))
|
||||
@@ -424,6 +424,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
return -1;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
if (swap_frame_buffers (cm))
|
||||
{
|
||||
|
@@ -87,14 +87,15 @@ typedef struct VP8Decompressor
|
||||
unsigned int time_decoding;
|
||||
unsigned int time_loop_filtering;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* variable for threading */
|
||||
|
||||
volatile int b_multithreaded_rd;
|
||||
int max_threads;
|
||||
int current_mb_col_main;
|
||||
int decoding_thread_count;
|
||||
int allocated_decoding_thread_count;
|
||||
|
||||
/* variable for threading */
|
||||
#if CONFIG_MULTITHREAD
|
||||
int mt_baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
int sync_range;
|
||||
int *mt_current_mb_col; /* Each row remembers its already decoded column. */
|
||||
@@ -125,7 +126,6 @@ typedef struct VP8Decompressor
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
vp8_dequant_rtcd_vtable_t dequant;
|
||||
struct vp8_dboolhuff_rtcd_vtable dboolhuff;
|
||||
#endif
|
||||
|
||||
|
||||
|
@@ -21,7 +21,6 @@
|
||||
|
||||
void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */
|
||||
unsigned char *yleft_col;
|
||||
unsigned char yleft_buf[16];
|
||||
@@ -146,17 +145,10 @@ void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row
|
||||
case MB_MODE_COUNT:
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) x;
|
||||
(void) mb_row;
|
||||
(void) mb_col;
|
||||
#endif
|
||||
}
|
||||
|
||||
void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */
|
||||
unsigned char *yleft_col;
|
||||
unsigned char yleft_buf[16];
|
||||
@@ -289,17 +281,10 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
|
||||
case MB_MODE_COUNT:
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) x;
|
||||
(void) mb_row;
|
||||
(void) mb_col;
|
||||
#endif
|
||||
}
|
||||
|
||||
void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */
|
||||
unsigned char *uleft_col; /*[16];*/
|
||||
unsigned char uleft_buf[8];
|
||||
@@ -452,17 +437,10 @@ void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_ro
|
||||
case MB_MODE_COUNT:
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) x;
|
||||
(void) mb_row;
|
||||
(void) mb_col;
|
||||
#endif
|
||||
}
|
||||
|
||||
void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */
|
||||
unsigned char *uleft_col; /*[16];*/
|
||||
unsigned char uleft_buf[8];
|
||||
@@ -621,12 +599,6 @@ void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_
|
||||
case MB_MODE_COUNT:
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) x;
|
||||
(void) mb_row;
|
||||
(void) mb_col;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -638,7 +610,6 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
|
||||
int mb_col,
|
||||
int num)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
int i, r, c;
|
||||
|
||||
unsigned char *Above; /* = *(x->base_dst) + x->dst - x->dst_stride; */
|
||||
@@ -935,15 +906,6 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
|
||||
|
||||
|
||||
}
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) xd;
|
||||
(void) b_mode;
|
||||
(void) predictor;
|
||||
(void) mb_row;
|
||||
(void) mb_col;
|
||||
(void) num;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
|
||||
@@ -951,7 +913,6 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
|
||||
*/
|
||||
void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
unsigned char *above_right; /* = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16; */
|
||||
unsigned int *src_ptr;
|
||||
unsigned int *dst_ptr0;
|
||||
@@ -973,10 +934,4 @@ void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row
|
||||
*dst_ptr0 = *src_ptr;
|
||||
*dst_ptr1 = *src_ptr;
|
||||
*dst_ptr2 = *src_ptr;
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) x;
|
||||
(void) mb_row;
|
||||
(void) mb_col;
|
||||
#endif
|
||||
}
|
||||
|
@@ -9,7 +9,7 @@
|
||||
*/
|
||||
|
||||
|
||||
#ifndef WIN32
|
||||
#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
#ifdef __APPLE__
|
||||
@@ -38,7 +38,6 @@ extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
|
||||
|
||||
void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
VP8_COMMON *const pc = & pbi->common;
|
||||
int i, j;
|
||||
|
||||
@@ -88,18 +87,11 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
|
||||
|
||||
for (i=0; i< pc->mb_rows; i++)
|
||||
pbi->mt_current_mb_col[i]=-1;
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) xd;
|
||||
(void) mbrd;
|
||||
(void) count;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
int eobtotal = 0;
|
||||
int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
|
||||
VP8_COMMON *pc = &pbi->common;
|
||||
@@ -222,18 +214,11 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
|
||||
(xd->qcoeff+16*16, xd->block[16].dequant,
|
||||
xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
|
||||
xd->dst.uv_stride, xd->eobs+16);
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) xd;
|
||||
(void) mb_row;
|
||||
(void) mb_col;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
|
||||
VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
|
||||
MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
|
||||
@@ -320,7 +305,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
vp8_adjust_mb_lf_value(xd, &filter_level);
|
||||
filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
|
||||
}
|
||||
|
||||
/* Distance of Mb to the various image edges.
|
||||
@@ -438,9 +423,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
||||
sem_post(&pbi->h_event_end_decoding);
|
||||
}
|
||||
}
|
||||
#else
|
||||
(void) p_data;
|
||||
#endif
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
@@ -448,7 +430,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
||||
|
||||
void vp8_decoder_create_threads(VP8D_COMP *pbi)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
int core_count = 0;
|
||||
int ithread;
|
||||
|
||||
@@ -482,16 +463,11 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
|
||||
|
||||
pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
|
||||
}
|
||||
|
||||
#else
|
||||
(void) pbi;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
VP8_COMMON *const pc = & pbi->common;
|
||||
int i;
|
||||
|
||||
@@ -589,15 +565,11 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
|
||||
pbi->mt_vleft_col = NULL ;
|
||||
}
|
||||
}
|
||||
#else
|
||||
(void) pbi;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
VP8_COMMON *const pc = & pbi->common;
|
||||
int i;
|
||||
int uv_width;
|
||||
@@ -646,17 +618,11 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
|
||||
for (i=0; i< pc->mb_rows; i++)
|
||||
CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
|
||||
}
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) width;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void vp8_decoder_remove_threads(VP8D_COMP *pbi)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
|
||||
/* shutdown MB Decoding thread; */
|
||||
if (pbi->b_multithreaded_rd)
|
||||
{
|
||||
@@ -702,15 +668,11 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
|
||||
pbi->de_thread_data = NULL;
|
||||
}
|
||||
}
|
||||
#else
|
||||
(void) pbi;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
VP8_COMMON *cm = &pbi->common;
|
||||
MACROBLOCKD *mbd = &pbi->mb;
|
||||
/*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/
|
||||
@@ -752,16 +714,11 @@ void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
|
||||
vp8_init_loop_filter(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) default_filt_lvl;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
int mb_row;
|
||||
VP8_COMMON *pc = &pbi->common;
|
||||
|
||||
@@ -863,7 +820,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
vp8_adjust_mb_lf_value(xd, &filter_level);
|
||||
filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
|
||||
}
|
||||
|
||||
/* Distance of Mb to the various image edges.
|
||||
@@ -981,8 +938,4 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
}
|
||||
|
||||
sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) xd;
|
||||
#endif
|
||||
}
|
||||
|
@@ -38,14 +38,14 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
|
||||
/*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
|
||||
cpi->rtcd.variance.var8x8 = vp8_variance8x8_c;
|
||||
cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
|
||||
cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;
|
||||
cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;*/
|
||||
cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;*/
|
||||
cpi->rtcd.variance.var16x16 = vp8_variance16x16_armv6;
|
||||
|
||||
/*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
|
||||
cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
|
||||
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
|
||||
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
|
||||
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;*/
|
||||
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/
|
||||
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_armv6;
|
||||
|
||||
/*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
|
||||
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
|
||||
|
@@ -14,7 +14,7 @@
|
||||
EXPORT |vp8_stop_encode|
|
||||
EXPORT |vp8_encode_value|
|
||||
|
||||
INCLUDE vpx_vp8_enc_asm_offsets.asm
|
||||
INCLUDE asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
|
@@ -11,7 +11,7 @@
|
||||
|
||||
EXPORT |vp8cx_pack_tokens_armv5|
|
||||
|
||||
INCLUDE vpx_vp8_enc_asm_offsets.asm
|
||||
INCLUDE asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
|
@@ -11,7 +11,7 @@
|
||||
|
||||
EXPORT |vp8cx_pack_mb_row_tokens_armv5|
|
||||
|
||||
INCLUDE vpx_vp8_enc_asm_offsets.asm
|
||||
INCLUDE asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
|
@@ -11,7 +11,7 @@
|
||||
|
||||
EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
|
||||
|
||||
INCLUDE vpx_vp8_enc_asm_offsets.asm
|
||||
INCLUDE asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
@@ -65,6 +65,8 @@
|
||||
numparts_loop
|
||||
ldr r10, [sp, #40] ; ptr
|
||||
ldr r5, [sp, #36] ; move mb_rows to the counting section
|
||||
sub r5, r5, r11 ; move start point with each partition
|
||||
; mb_rows starts at i
|
||||
str r5, [sp, #12]
|
||||
|
||||
; Reset all of the VP8 Writer data for each partition that
|
||||
|
147
vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
Normal file
147
vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm
Normal file
@@ -0,0 +1,147 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_variance16x16_armv6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
|vp8_variance16x16_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
mov r12, #16 ; set loop counter to 16 (=block height)
|
||||
mov r8, #0 ; initialize sum = 0
|
||||
mov r11, #0 ; initialize sse = 0
|
||||
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r4, [r0, #0x0] ; load 4 src pixels
|
||||
ldr r5, [r2, #0x0] ; load 4 ref pixels
|
||||
|
||||
mov lr, #0 ; constant zero
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
; calculate total sum
|
||||
adds r8, r8, r4 ; add positive differences to sum
|
||||
subs r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r4, [r0, #0x4] ; load 4 src pixels
|
||||
ldr r5, [r2, #0x4] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 3rd 4 pixels
|
||||
ldr r4, [r0, #0x8] ; load 4 src pixels
|
||||
ldr r5, [r2, #0x8] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 4th 4 pixels
|
||||
ldr r4, [r0, #0xc] ; load 4 src pixels
|
||||
ldr r5, [r2, #0xc] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r6, [sp, #0x28] ; get address of sse
|
||||
mul r0, r8, r8 ; sum * sum
|
||||
str r11, [r6] ; store sse
|
||||
sub r0, r11, r0, ASR #8 ; return (sse - ((sum * sum) >> 8))
|
||||
|
||||
ldmfd sp!, {r4-r12, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
@@ -112,10 +112,7 @@
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA fastfdct_dat, DATA, READONLY
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_ffdct_coeff_
|
||||
DCD ffdct_coeff
|
||||
ffdct_coeff
|
||||
|
@@ -165,10 +165,7 @@
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA fastfdct8x4_dat, DATA, READONLY
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_ffdct8_coeff_
|
||||
DCD ffdct8_coeff
|
||||
ffdct8_coeff
|
||||
|
@@ -122,10 +122,7 @@
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA dct4x4_dat, DATA, READONLY
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_dct_matrix_
|
||||
DCD dct_matrix
|
||||
dct_matrix
|
||||
|
@@ -9,7 +9,7 @@
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_sub_pixel_variance16x16_neon|
|
||||
EXPORT |vp8_sub_pixel_variance16x16_neon_func|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
@@ -24,7 +24,7 @@
|
||||
; stack(r6) unsigned int *sse
|
||||
;note: most of the code is copied from bilinear_predict16x16_neon and vp8_variance16x16_neon.
|
||||
|
||||
|vp8_sub_pixel_variance16x16_neon| PROC
|
||||
|vp8_sub_pixel_variance16x16_neon_func| PROC
|
||||
push {r4-r6, lr}
|
||||
|
||||
ldr r12, _BilinearTaps_coeff_
|
||||
@@ -416,10 +416,7 @@ sub_pixel_variance16x16_neon_loop
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA vp8e_bilinear_taps_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_BilinearTaps_coeff_
|
||||
DCD bilinear_taps_coeff
|
||||
bilinear_taps_coeff
|
||||
|
@@ -215,10 +215,7 @@ sub_pixel_variance8x8_neon_loop
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bilinear_taps_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
_BilinearTaps_coeff_
|
||||
DCD bilinear_taps_coeff
|
||||
bilinear_taps_coeff
|
||||
|
71
vp8/encoder/arm/variance_arm.c
Normal file
71
vp8/encoder/arm/variance_arm.c
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "variance.h"
|
||||
#include "filter.h"
|
||||
#include "arm/bilinearfilter_arm.h"
|
||||
|
||||
#if HAVE_ARMV6
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x16_armv6
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
unsigned short first_pass[36*16];
|
||||
unsigned char second_pass[20*16];
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass,
|
||||
src_pixels_per_line,
|
||||
17, 16, HFilter);
|
||||
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
|
||||
16, 16, 16, VFilter);
|
||||
|
||||
return vp8_variance16x16_armv6(second_pass, 16, dst_ptr,
|
||||
dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x16_neon
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
return vp8_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
return vp8_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
return vp8_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
|
||||
else
|
||||
return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
#endif
|
@@ -12,6 +12,23 @@
|
||||
#ifndef VARIANCE_ARM_H
|
||||
#define VARIANCE_ARM_H
|
||||
|
||||
#if HAVE_ARMV6
|
||||
|
||||
extern prototype_variance(vp8_variance16x16_armv6);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
#undef vp8_variance_subpixvar16x16
|
||||
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_armv6
|
||||
|
||||
#undef vp8_variance_var16x16
|
||||
#define vp8_variance_var16x16 vp8_variance16x16_armv6
|
||||
|
||||
#endif /* !CONFIG_RUNTIME_CPU_DETECT */
|
||||
|
||||
#endif /* HAVE_ARMV6 */
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern prototype_sad(vp8_sad4x4_neon);
|
||||
extern prototype_sad(vp8_sad8x8_neon);
|
||||
@@ -30,6 +47,7 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_neon);
|
||||
//extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c);
|
||||
//extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon_func);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon);
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@@ -12,9 +12,9 @@
|
||||
#include "vpx_ports/config.h"
|
||||
#include <stddef.h>
|
||||
|
||||
#include "../treewriter.h"
|
||||
#include "../tokenize.h"
|
||||
#include "../onyx_int.h"
|
||||
#include "treewriter.h"
|
||||
#include "tokenize.h"
|
||||
#include "onyx_int.h"
|
||||
|
||||
#define ct_assert(name,cond) \
|
||||
static void assert_##name(void) UNUSED;\
|
||||
@@ -31,6 +31,7 @@
|
||||
* {
|
||||
*/
|
||||
|
||||
//pack tokens
|
||||
DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue));
|
||||
DEFINE(vp8_writer_range, offsetof(vp8_writer, range));
|
||||
DEFINE(vp8_writer_value, offsetof(vp8_writer, value));
|
||||
@@ -40,19 +41,19 @@ DEFINE(vp8_writer_buffer, offsetof(vp8_writer, buffer));
|
||||
|
||||
DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token));
|
||||
DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra));
|
||||
DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree));
|
||||
DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree));
|
||||
DEFINE(tokenextra_skip_eob_node, offsetof(TOKENEXTRA, skip_eob_node));
|
||||
DEFINE(TOKENEXTRA_SZ, sizeof(TOKENEXTRA));
|
||||
|
||||
DEFINE(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct));
|
||||
DEFINE(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct));
|
||||
|
||||
DEFINE(vp8_token_value, offsetof(vp8_token, value));
|
||||
DEFINE(vp8_token_len, offsetof(vp8_token, Len));
|
||||
|
||||
DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree));
|
||||
DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob));
|
||||
DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len));
|
||||
DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val));
|
||||
DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree));
|
||||
DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob));
|
||||
DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len));
|
||||
DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val));
|
||||
|
||||
DEFINE(vp8_comp_tplist, offsetof(VP8_COMP, tplist));
|
||||
DEFINE(vp8_comp_common, offsetof(VP8_COMP, common));
|
||||
@@ -62,12 +63,14 @@ DEFINE(tokenlist_start, offsetof(TOKENLIST, start));
|
||||
DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop));
|
||||
DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST));
|
||||
|
||||
DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
|
||||
DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
|
||||
|
||||
// These two sizes are used in vp7cx_pack_tokens. They are hard coded
|
||||
// so if the size changes this will have to be adjusted.
|
||||
// These two sizes are used in vp8cx_pack_tokens. They are hard coded
|
||||
// so if the size changes this will have to be adjusted.
|
||||
#if HAVE_ARMV5TE
|
||||
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)
|
||||
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)
|
||||
#endif
|
||||
|
||||
//add asserts for any offset that is not supported by assembly code
|
||||
//add asserts for any size that is not supported by assembly code
|
@@ -1654,10 +1654,12 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
|
||||
{
|
||||
vp8_start_encode(&cpi->bc2, cx_data + bc->pos);
|
||||
|
||||
if (!cpi->b_multi_threaded)
|
||||
pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count);
|
||||
else
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded)
|
||||
pack_mb_row_tokens(cpi, &cpi->bc2);
|
||||
else
|
||||
#endif
|
||||
pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count);
|
||||
|
||||
vp8_stop_encode(&cpi->bc2);
|
||||
oh.first_partition_length_in_bytes = cpi->bc.pos ;
|
||||
|
@@ -112,6 +112,7 @@ typedef struct
|
||||
|
||||
unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens];
|
||||
int optimize;
|
||||
int q_index;
|
||||
|
||||
void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
|
||||
void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
|
||||
|
@@ -365,6 +365,33 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
|
||||
x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
|
||||
x->block[24].zbin_extra = (short)zbin_extra;
|
||||
|
||||
/* save this macroblock QIndex for vp8_update_zbin_extra() */
|
||||
x->q_index = QIndex;
|
||||
}
|
||||
void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
{
|
||||
int i;
|
||||
int QIndex = x->q_index;
|
||||
int zbin_extra;
|
||||
|
||||
// Y
|
||||
zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
x->block[i].zbin_extra = (short)zbin_extra;
|
||||
}
|
||||
|
||||
// UV
|
||||
zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
|
||||
for (i = 16; i < 24; i++)
|
||||
{
|
||||
x->block[i].zbin_extra = (short)zbin_extra;
|
||||
}
|
||||
|
||||
// Y2
|
||||
zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
|
||||
x->block[24].zbin_extra = (short)zbin_extra;
|
||||
}
|
||||
|
||||
void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
|
||||
@@ -372,13 +399,6 @@ void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
|
||||
// Clear Zbin mode boost for default case
|
||||
cpi->zbin_mode_boost = 0;
|
||||
|
||||
// vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
|
||||
// when these values are not all zero.
|
||||
if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
|
||||
{
|
||||
vp8cx_init_quantizer(cpi);
|
||||
}
|
||||
|
||||
// MB level quantizer setup
|
||||
vp8cx_mb_init_quantizer(cpi, &cpi->mb);
|
||||
}
|
||||
@@ -460,6 +480,16 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
|
||||
int seg_map_index = (mb_row * cpi->common.mb_cols);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
const int nsync = cpi->mt_sync_range;
|
||||
const int rightmost_col = cm->mb_cols - 1;
|
||||
volatile const int *last_row_current_mb_col;
|
||||
|
||||
if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
|
||||
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
|
||||
else
|
||||
last_row_current_mb_col = &rightmost_col;
|
||||
#endif
|
||||
|
||||
// reset above block coeffs
|
||||
xd->above_context = cm->above_context;
|
||||
@@ -505,6 +535,21 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
x->rddiv = cpi->RDDIV;
|
||||
x->rdmult = cpi->RDMULT;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
|
||||
{
|
||||
if ((mb_col & (nsync - 1)) == 0)
|
||||
{
|
||||
while (mb_col > (*last_row_current_mb_col - nsync)
|
||||
&& (*last_row_current_mb_col) != (cm->mb_cols - 1))
|
||||
{
|
||||
x86_pause_hint();
|
||||
thread_sleep(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
|
||||
activity_sum += vp8_activity_masking(cpi, x);
|
||||
|
||||
@@ -601,7 +646,12 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
x->partition_info++;
|
||||
|
||||
xd->above_context++;
|
||||
cpi->current_mb_col_main = mb_col;
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded != 0)
|
||||
{
|
||||
cpi->mt_current_mb_col[mb_row] = mb_col;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//extend the recon for intra prediction
|
||||
@@ -615,12 +665,15 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
xd->mode_info_context++;
|
||||
x->partition_info++;
|
||||
x->activity_sum += activity_sum;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1))
|
||||
{
|
||||
sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void vp8_encode_frame(VP8_COMP *cpi)
|
||||
{
|
||||
int mb_row;
|
||||
@@ -747,7 +800,76 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
struct vpx_usec_timer emr_timer;
|
||||
vpx_usec_timer_start(&emr_timer);
|
||||
|
||||
if (!cpi->b_multi_threaded)
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded)
|
||||
{
|
||||
int i;
|
||||
|
||||
vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
|
||||
|
||||
for (i = 0; i < cm->mb_rows; i++)
|
||||
cpi->mt_current_mb_col[i] = 0;
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
sem_post(&cpi->h_event_start_encoding[i]);
|
||||
}
|
||||
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
|
||||
{
|
||||
vp8_zero(cm->left_context)
|
||||
|
||||
tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
|
||||
|
||||
encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
|
||||
|
||||
// adjust to the next row of mbs
|
||||
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
|
||||
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
|
||||
x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
|
||||
|
||||
xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
|
||||
x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
|
||||
|
||||
}
|
||||
|
||||
sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */
|
||||
|
||||
cpi->tok_count = 0;
|
||||
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
|
||||
{
|
||||
cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
|
||||
}
|
||||
|
||||
if (xd->segmentation_enabled)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
if (xd->segmentation_enabled)
|
||||
{
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
totalrate += cpi->mb_row_ei[i].totalrate;
|
||||
}
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
// for each macroblock row in image
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
||||
@@ -765,100 +887,6 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
|
||||
cpi->tok_count = tp - cpi->tok;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
int i;
|
||||
|
||||
vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
|
||||
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
|
||||
{
|
||||
cpi->current_mb_col_main = -1;
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
if ((mb_row + i + 1) >= cm->mb_rows)
|
||||
break;
|
||||
|
||||
cpi->mb_row_ei[i].mb_row = mb_row + i + 1;
|
||||
cpi->mb_row_ei[i].tp = cpi->tok + (mb_row + i + 1) * (cm->mb_cols * 16 * 24);
|
||||
cpi->mb_row_ei[i].current_mb_col = -1;
|
||||
//SetEvent(cpi->h_event_mbrencoding[i]);
|
||||
sem_post(&cpi->h_event_mbrencoding[i]);
|
||||
}
|
||||
|
||||
vp8_zero(cm->left_context)
|
||||
|
||||
tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
|
||||
|
||||
encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
|
||||
|
||||
// adjust to the next row of mbs
|
||||
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
|
||||
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
|
||||
x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
|
||||
|
||||
xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
|
||||
x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
|
||||
|
||||
if (mb_row < cm->mb_rows - 1)
|
||||
//WaitForSingleObject(cpi->h_event_main, INFINITE);
|
||||
sem_wait(&cpi->h_event_main);
|
||||
}
|
||||
|
||||
/*
|
||||
for( ;mb_row<cm->mb_rows; mb_row ++)
|
||||
{
|
||||
vp8_zero( cm->left_context)
|
||||
|
||||
tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
|
||||
|
||||
encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
|
||||
// adjust to the next row of mbs
|
||||
x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
|
||||
x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
|
||||
x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
|
||||
|
||||
}
|
||||
*/
|
||||
cpi->tok_count = 0;
|
||||
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
|
||||
{
|
||||
cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
|
||||
}
|
||||
|
||||
if (xd->segmentation_enabled)
|
||||
{
|
||||
|
||||
int i, j;
|
||||
|
||||
if (xd->segmentation_enabled)
|
||||
{
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
totalrate += cpi->mb_row_ei[i].totalrate;
|
||||
}
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
vpx_usec_timer_mark(&emr_timer);
|
||||
@@ -1120,77 +1148,41 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
|
||||
{
|
||||
int Error4x4, Error16x16, error_uv;
|
||||
B_PREDICTION_MODE intra_bmodes[16];
|
||||
int rate4x4, rate16x16, rateuv;
|
||||
int dist4x4, dist16x16, distuv;
|
||||
int rate = 0;
|
||||
int rate4x4_tokenonly = 0;
|
||||
int rate16x16_tokenonly = 0;
|
||||
int rateuv_tokenonly = 0;
|
||||
int i;
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
|
||||
if (cpi->sf.RD || cpi->compressor_speed != 2)
|
||||
if (cpi->sf.RD && cpi->compressor_speed != 2)
|
||||
{
|
||||
Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4);
|
||||
|
||||
//save the b modes for possible later use
|
||||
for (i = 0; i < 16; i++)
|
||||
intra_bmodes[i] = x->e_mbd.block[i].bmi.mode;
|
||||
error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
|
||||
rate += rateuv;
|
||||
|
||||
Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
|
||||
|
||||
error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
|
||||
Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, Error16x16);
|
||||
|
||||
vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
|
||||
rate += rateuv;
|
||||
|
||||
if (Error4x4 < Error16x16)
|
||||
{
|
||||
rate += rate4x4;
|
||||
x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
|
||||
|
||||
// get back the intra block modes
|
||||
for (i = 0; i < 16; i++)
|
||||
x->e_mbd.block[i].bmi.mode = intra_bmodes[i];
|
||||
|
||||
vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
|
||||
cpi->prediction_error += Error4x4 ;
|
||||
#if 0
|
||||
// Experimental RD code
|
||||
cpi->frame_distortion += dist4x4;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
|
||||
rate += rate16x16;
|
||||
|
||||
#if 0
|
||||
// Experimental RD code
|
||||
cpi->prediction_error += Error16x16;
|
||||
cpi->frame_distortion += dist16x16;
|
||||
#endif
|
||||
}
|
||||
|
||||
sum_intra_stats(cpi, x);
|
||||
|
||||
vp8_tokenize_mb(cpi, &x->e_mbd, t);
|
||||
rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
|
||||
int rate2, distortion2;
|
||||
int rate2, best_distortion;
|
||||
MB_PREDICTION_MODE mode, best_mode = DC_PRED;
|
||||
int this_rd;
|
||||
Error16x16 = INT_MAX;
|
||||
|
||||
vp8_pick_intra_mbuv_mode(x);
|
||||
|
||||
for (mode = DC_PRED; mode <= TM_PRED; mode ++)
|
||||
{
|
||||
int distortion2;
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.mode = mode;
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
|
||||
@@ -1201,35 +1193,28 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
|
||||
{
|
||||
Error16x16 = this_rd;
|
||||
best_mode = mode;
|
||||
best_distortion = distortion2;
|
||||
}
|
||||
}
|
||||
x->e_mbd.mode_info_context->mbmi.mode = best_mode;
|
||||
|
||||
vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &distortion2);
|
||||
|
||||
if (distortion2 == INT_MAX)
|
||||
Error4x4 = INT_MAX;
|
||||
else
|
||||
Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
|
||||
|
||||
if (Error4x4 < Error16x16)
|
||||
{
|
||||
x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
|
||||
vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
|
||||
cpi->prediction_error += Error4x4;
|
||||
}
|
||||
else
|
||||
{
|
||||
x->e_mbd.mode_info_context->mbmi.mode = best_mode;
|
||||
vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
|
||||
cpi->prediction_error += Error16x16;
|
||||
}
|
||||
|
||||
vp8_pick_intra_mbuv_mode(x);
|
||||
vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
|
||||
sum_intra_stats(cpi, x);
|
||||
vp8_tokenize_mb(cpi, &x->e_mbd, t);
|
||||
Error4x4 = vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &best_distortion);
|
||||
}
|
||||
|
||||
if (Error4x4 < Error16x16)
|
||||
{
|
||||
x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
|
||||
vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
|
||||
}
|
||||
|
||||
vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
|
||||
sum_intra_stats(cpi, x);
|
||||
vp8_tokenize_mb(cpi, &x->e_mbd, t);
|
||||
|
||||
return rate;
|
||||
}
|
||||
#ifdef SPEEDSTATS
|
||||
@@ -1261,10 +1246,17 @@ int vp8cx_encode_inter_macroblock
|
||||
|
||||
if (cpi->sf.RD)
|
||||
{
|
||||
int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
|
||||
|
||||
/* Are we using the fast quantizer for the mode selection? */
|
||||
if(cpi->sf.use_fastquant_for_pick)
|
||||
{
|
||||
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
|
||||
|
||||
/* the fast quantizer does not use zbin_extra, so
|
||||
* do not recalculate */
|
||||
cpi->zbin_mode_boost_enabled = 0;
|
||||
}
|
||||
inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
|
||||
|
||||
/* switch back to the regular quantizer for the encode */
|
||||
@@ -1273,6 +1265,9 @@ int vp8cx_encode_inter_macroblock
|
||||
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
|
||||
}
|
||||
|
||||
/* restore cpi->zbin_mode_boost_enabled */
|
||||
cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
|
||||
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -1289,7 +1284,7 @@ int vp8cx_encode_inter_macroblock
|
||||
#endif
|
||||
|
||||
// MB level adjutment to quantizer setup
|
||||
if (xd->segmentation_enabled || cpi->zbin_mode_boost_enabled)
|
||||
if (xd->segmentation_enabled)
|
||||
{
|
||||
// If cyclic update enabled
|
||||
if (cpi->cyclic_refresh_mode_enabled)
|
||||
@@ -1299,9 +1294,14 @@ int vp8cx_encode_inter_macroblock
|
||||
((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
|
||||
{
|
||||
xd->mode_info_context->mbmi.segment_id = 0;
|
||||
|
||||
/* segment_id changed, so update */
|
||||
vp8cx_mb_init_quantizer(cpi, x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
|
||||
if (cpi->zbin_mode_boost_enabled)
|
||||
{
|
||||
@@ -1325,7 +1325,7 @@ int vp8cx_encode_inter_macroblock
|
||||
else
|
||||
cpi->zbin_mode_boost = 0;
|
||||
|
||||
vp8cx_mb_init_quantizer(cpi, x);
|
||||
vp8_update_zbin_extra(cpi, x);
|
||||
}
|
||||
|
||||
cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
|
||||
|
@@ -58,21 +58,6 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BLOCK
|
||||
RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
}
|
||||
|
||||
void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode)
|
||||
{
|
||||
vp8_predict_intra4x4(b, best_mode, b->predictor);
|
||||
|
||||
ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
|
||||
|
||||
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
|
||||
|
||||
x->quantize_b(be, b);
|
||||
|
||||
IDCT_INVOKE(&rtcd->common->idct, idct16)(b->dqcoeff, b->diff, 32);
|
||||
|
||||
RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
}
|
||||
|
||||
void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb)
|
||||
{
|
||||
int i;
|
||||
@@ -144,51 +129,6 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
{
|
||||
int b;
|
||||
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
|
||||
ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
|
||||
|
||||
vp8_transform_intra_mby(x);
|
||||
|
||||
vp8_quantize_mby(x);
|
||||
|
||||
vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
|
||||
|
||||
RECON_INVOKE(&rtcd->common->recon, recon_mby)
|
||||
(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
|
||||
|
||||
// make sure block modes are set the way we want them for context updates
|
||||
for (b = 0; b < 16; b++)
|
||||
{
|
||||
BLOCKD *d = &x->e_mbd.block[b];
|
||||
|
||||
switch (x->e_mbd.mode_info_context->mbmi.mode)
|
||||
{
|
||||
|
||||
case DC_PRED:
|
||||
d->bmi.mode = B_DC_PRED;
|
||||
break;
|
||||
case V_PRED:
|
||||
d->bmi.mode = B_VE_PRED;
|
||||
break;
|
||||
case H_PRED:
|
||||
d->bmi.mode = B_HE_PRED;
|
||||
break;
|
||||
case TM_PRED:
|
||||
d->bmi.mode = B_TM_PRED;
|
||||
break;
|
||||
default:
|
||||
d->bmi.mode = B_DC_PRED;
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
{
|
||||
vp8_build_intra_predictors_mbuv(&x->e_mbd);
|
||||
@@ -213,17 +153,3 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
|
||||
}
|
||||
|
||||
void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
{
|
||||
vp8_build_intra_predictors_mbuv(&x->e_mbd);
|
||||
|
||||
ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
|
||||
|
||||
vp8_transform_mbuv(x);
|
||||
|
||||
vp8_quantize_mbuv(x);
|
||||
|
||||
vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
|
||||
|
||||
vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
|
||||
}
|
||||
|
@@ -19,7 +19,5 @@ void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *, MACROBLOCK *mb);
|
||||
void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);
|
||||
void vp8_update_mode_context(int *abmode, int *lbmode, int i, int best_mode);
|
||||
void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);
|
||||
void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
|
||||
void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
|
||||
|
||||
#endif
|
||||
|
@@ -128,7 +128,7 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc)
|
||||
|
||||
while (--i > 3);
|
||||
|
||||
if (x & 240)
|
||||
if (x & 0xFFF0)
|
||||
cost += vp8_cost_bit(p [MVPbits + 3], (x >> 3) & 1);
|
||||
}
|
||||
|
||||
|
@@ -8,15 +8,18 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "onyx_int.h"
|
||||
#include "threading.h"
|
||||
#include "common.h"
|
||||
#include "extend.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
|
||||
extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
|
||||
extern int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
|
||||
extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
TOKENEXTRA **t, int recon_yoffset,
|
||||
int recon_uvoffset);
|
||||
extern int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
TOKENEXTRA **t);
|
||||
extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
|
||||
extern void vp8_build_block_offsets(MACROBLOCK *x);
|
||||
extern void vp8_setup_block_ptrs(MACROBLOCK *x);
|
||||
@@ -24,12 +27,12 @@ extern void vp8_setup_block_ptrs(MACROBLOCK *x);
|
||||
static
|
||||
THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread;
|
||||
VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1);
|
||||
VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1);
|
||||
MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2);
|
||||
ENTROPY_CONTEXT_PLANES mb_row_left_context;
|
||||
|
||||
const int nsync = cpi->mt_sync_range;
|
||||
//printf("Started thread %d\n", ithread);
|
||||
|
||||
while (1)
|
||||
@@ -38,218 +41,213 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
break;
|
||||
|
||||
//if(WaitForSingleObject(cpi->h_event_mbrencoding[ithread], INFINITE) == WAIT_OBJECT_0)
|
||||
if (sem_wait(&cpi->h_event_mbrencoding[ithread]) == 0)
|
||||
if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0)
|
||||
{
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
int mb_row;
|
||||
MACROBLOCK *x = &mbri->mb;
|
||||
MACROBLOCKD *xd = &x->e_mbd;
|
||||
TOKENEXTRA *tp ;
|
||||
|
||||
int *segment_counts = mbri->segment_counts;
|
||||
int *totalrate = &mbri->totalrate;
|
||||
|
||||
if (cpi->b_multi_threaded == FALSE) // we're shutting down
|
||||
break;
|
||||
else
|
||||
|
||||
for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
|
||||
{
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
int mb_row = mbri->mb_row;
|
||||
MACROBLOCK *x = &mbri->mb;
|
||||
MACROBLOCKD *xd = &x->e_mbd;
|
||||
TOKENEXTRA **tp = &mbri->tp;
|
||||
int *segment_counts = mbri->segment_counts;
|
||||
int *totalrate = &mbri->totalrate;
|
||||
|
||||
int i;
|
||||
int recon_yoffset, recon_uvoffset;
|
||||
int mb_col;
|
||||
int ref_fb_idx = cm->lst_fb_idx;
|
||||
int dst_fb_idx = cm->new_fb_idx;
|
||||
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
|
||||
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
|
||||
volatile int *last_row_current_mb_col;
|
||||
INT64 activity_sum = 0;
|
||||
|
||||
tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24));
|
||||
|
||||
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
|
||||
|
||||
// reset above block coeffs
|
||||
xd->above_context = cm->above_context;
|
||||
xd->left_context = &mb_row_left_context;
|
||||
|
||||
vp8_zero(mb_row_left_context);
|
||||
|
||||
xd->up_available = (mb_row != 0);
|
||||
recon_yoffset = (mb_row * recon_y_stride * 16);
|
||||
recon_uvoffset = (mb_row * recon_uv_stride * 8);
|
||||
|
||||
cpi->tplist[mb_row].start = tp;
|
||||
|
||||
//printf("Thread mb_row = %d\n", mb_row);
|
||||
|
||||
// for each macroblock col in image
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
int i;
|
||||
int recon_yoffset, recon_uvoffset;
|
||||
int mb_col;
|
||||
int ref_fb_idx = cm->lst_fb_idx;
|
||||
int dst_fb_idx = cm->new_fb_idx;
|
||||
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
|
||||
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
|
||||
volatile int *last_row_current_mb_col;
|
||||
INT64 activity_sum = 0;
|
||||
int seg_map_index = (mb_row * cm->mb_cols);
|
||||
|
||||
if (ithread > 0)
|
||||
last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col;
|
||||
else
|
||||
last_row_current_mb_col = &cpi->current_mb_col_main;
|
||||
|
||||
// reset above block coeffs
|
||||
xd->above_context = cm->above_context;
|
||||
xd->left_context = &mb_row_left_context;
|
||||
|
||||
vp8_zero(mb_row_left_context);
|
||||
|
||||
xd->up_available = (mb_row != 0);
|
||||
recon_yoffset = (mb_row * recon_y_stride * 16);
|
||||
recon_uvoffset = (mb_row * recon_uv_stride * 8);
|
||||
|
||||
|
||||
cpi->tplist[mb_row].start = *tp;
|
||||
|
||||
//printf("Thread mb_row = %d\n", mb_row);
|
||||
|
||||
// for each macroblock col in image
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
if ((mb_col & (nsync - 1)) == 0)
|
||||
{
|
||||
int seg_map_index = (mb_row * cm->mb_cols);
|
||||
|
||||
while (mb_col > (*last_row_current_mb_col - 1) && *last_row_current_mb_col != cm->mb_cols - 1)
|
||||
while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != cm->mb_cols - 1)
|
||||
{
|
||||
x86_pause_hint();
|
||||
thread_sleep(0);
|
||||
}
|
||||
|
||||
// Distance of Mb to the various image edges.
|
||||
// These specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
xd->mb_to_left_edge = -((mb_col * 16) << 3);
|
||||
xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
|
||||
xd->mb_to_top_edge = -((mb_row * 16) << 3);
|
||||
xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
|
||||
|
||||
// Set up limit values for motion vectors used to prevent them extending outside the UMV borders
|
||||
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
|
||||
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
|
||||
|
||||
xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
|
||||
xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
|
||||
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
|
||||
xd->left_available = (mb_col != 0);
|
||||
|
||||
x->rddiv = cpi->RDDIV;
|
||||
x->rdmult = cpi->RDMULT;
|
||||
|
||||
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
|
||||
activity_sum += vp8_activity_masking(cpi, x);
|
||||
|
||||
// Is segmentation enabled
|
||||
// MB level adjutment to quantizer
|
||||
if (xd->segmentation_enabled)
|
||||
{
|
||||
// Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
|
||||
if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
|
||||
xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
|
||||
else
|
||||
xd->mode_info_context->mbmi.segment_id = 0;
|
||||
|
||||
vp8cx_mb_init_quantizer(cpi, x);
|
||||
}
|
||||
else
|
||||
xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
|
||||
|
||||
x->active_ptr = cpi->active_map + seg_map_index + mb_col;
|
||||
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
{
|
||||
*totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
|
||||
#ifdef MODE_STATS
|
||||
y_modes[xd->mbmi.mode] ++;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
*totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
|
||||
|
||||
#ifdef MODE_STATS
|
||||
inter_y_modes[xd->mbmi.mode] ++;
|
||||
|
||||
if (xd->mbmi.mode == SPLITMV)
|
||||
{
|
||||
int b;
|
||||
|
||||
for (b = 0; b < xd->mbmi.partition_count; b++)
|
||||
{
|
||||
inter_b_modes[x->partition->bmi[b].mode] ++;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Count of last ref frame 0,0 useage
|
||||
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
|
||||
cpi->inter_zz_count ++;
|
||||
|
||||
// Special case code for cyclic refresh
|
||||
// If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
|
||||
// during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
|
||||
if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
|
||||
{
|
||||
cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
|
||||
|
||||
// If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
|
||||
// Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
|
||||
// else mark it as dirty (1).
|
||||
if (xd->mode_info_context->mbmi.segment_id)
|
||||
cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
|
||||
else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
|
||||
{
|
||||
if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
|
||||
cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
|
||||
}
|
||||
else
|
||||
cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
|
||||
|
||||
}
|
||||
}
|
||||
cpi->tplist[mb_row].stop = *tp;
|
||||
|
||||
x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
|
||||
|
||||
// adjust to the next column of macroblocks
|
||||
x->src.y_buffer += 16;
|
||||
x->src.u_buffer += 8;
|
||||
x->src.v_buffer += 8;
|
||||
|
||||
recon_yoffset += 16;
|
||||
recon_uvoffset += 8;
|
||||
|
||||
// Keep track of segment useage
|
||||
segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
|
||||
|
||||
// skip to next mb
|
||||
xd->mode_info_context++;
|
||||
x->partition_info++;
|
||||
|
||||
xd->above_context++;
|
||||
|
||||
cpi->mb_row_ei[ithread].current_mb_col = mb_col;
|
||||
|
||||
}
|
||||
|
||||
//extend the recon for intra prediction
|
||||
vp8_extend_mb_row(
|
||||
&cm->yv12_fb[dst_fb_idx],
|
||||
xd->dst.y_buffer + 16,
|
||||
xd->dst.u_buffer + 8,
|
||||
xd->dst.v_buffer + 8);
|
||||
// Distance of Mb to the various image edges.
|
||||
// These specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
xd->mb_to_left_edge = -((mb_col * 16) << 3);
|
||||
xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
|
||||
xd->mb_to_top_edge = -((mb_row * 16) << 3);
|
||||
xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
|
||||
|
||||
// this is to account for the border
|
||||
// Set up limit values for motion vectors used to prevent them extending outside the UMV borders
|
||||
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
|
||||
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
|
||||
|
||||
xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
|
||||
xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
|
||||
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
|
||||
xd->left_available = (mb_col != 0);
|
||||
|
||||
x->rddiv = cpi->RDDIV;
|
||||
x->rdmult = cpi->RDMULT;
|
||||
|
||||
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
|
||||
activity_sum += vp8_activity_masking(cpi, x);
|
||||
|
||||
// Is segmentation enabled
|
||||
// MB level adjutment to quantizer
|
||||
if (xd->segmentation_enabled)
|
||||
{
|
||||
// Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
|
||||
if (cpi->segmentation_map[seg_map_index + mb_col] <= 3)
|
||||
xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index + mb_col];
|
||||
else
|
||||
xd->mode_info_context->mbmi.segment_id = 0;
|
||||
|
||||
vp8cx_mb_init_quantizer(cpi, x);
|
||||
}
|
||||
else
|
||||
xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
|
||||
|
||||
x->active_ptr = cpi->active_map + seg_map_index + mb_col;
|
||||
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
{
|
||||
*totalrate += vp8cx_encode_intra_macro_block(cpi, x, &tp);
|
||||
#ifdef MODE_STATS
|
||||
y_modes[xd->mbmi.mode] ++;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
*totalrate += vp8cx_encode_inter_macroblock(cpi, x, &tp, recon_yoffset, recon_uvoffset);
|
||||
|
||||
#ifdef MODE_STATS
|
||||
inter_y_modes[xd->mbmi.mode] ++;
|
||||
|
||||
if (xd->mbmi.mode == SPLITMV)
|
||||
{
|
||||
int b;
|
||||
|
||||
for (b = 0; b < xd->mbmi.partition_count; b++)
|
||||
{
|
||||
inter_b_modes[x->partition->bmi[b].mode] ++;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Count of last ref frame 0,0 useage
|
||||
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
|
||||
cpi->inter_zz_count++;
|
||||
|
||||
// Special case code for cyclic refresh
|
||||
// If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
|
||||
// during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
|
||||
if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
|
||||
{
|
||||
const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
|
||||
cpi->segmentation_map[seg_map_index + mb_col] = mbmi->segment_id;
|
||||
|
||||
// If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
|
||||
// Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
|
||||
// else mark it as dirty (1).
|
||||
if (mbmi->segment_id)
|
||||
cpi->cyclic_refresh_map[seg_map_index + mb_col] = -1;
|
||||
else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME))
|
||||
{
|
||||
if (cpi->cyclic_refresh_map[seg_map_index + mb_col] == 1)
|
||||
cpi->cyclic_refresh_map[seg_map_index + mb_col] = 0;
|
||||
}
|
||||
else
|
||||
cpi->cyclic_refresh_map[seg_map_index + mb_col] = 1;
|
||||
|
||||
}
|
||||
}
|
||||
cpi->tplist[mb_row].stop = tp;
|
||||
|
||||
x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
|
||||
|
||||
// adjust to the next column of macroblocks
|
||||
x->src.y_buffer += 16;
|
||||
x->src.u_buffer += 8;
|
||||
x->src.v_buffer += 8;
|
||||
|
||||
recon_yoffset += 16;
|
||||
recon_uvoffset += 8;
|
||||
|
||||
// Keep track of segment useage
|
||||
segment_counts[xd->mode_info_context->mbmi.segment_id]++;
|
||||
|
||||
// skip to next mb
|
||||
xd->mode_info_context++;
|
||||
x->partition_info++;
|
||||
x->activity_sum += activity_sum;
|
||||
|
||||
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
|
||||
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
|
||||
x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
|
||||
|
||||
xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
|
||||
x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
|
||||
|
||||
if (ithread == (cpi->encoding_thread_count - 1) || mb_row == cm->mb_rows - 1)
|
||||
{
|
||||
//SetEvent(cpi->h_event_main);
|
||||
sem_post(&cpi->h_event_main);
|
||||
}
|
||||
xd->above_context++;
|
||||
|
||||
cpi->mt_current_mb_col[mb_row] = mb_col;
|
||||
}
|
||||
|
||||
//extend the recon for intra prediction
|
||||
vp8_extend_mb_row(
|
||||
&cm->yv12_fb[dst_fb_idx],
|
||||
xd->dst.y_buffer + 16,
|
||||
xd->dst.u_buffer + 8,
|
||||
xd->dst.v_buffer + 8);
|
||||
|
||||
// this is to account for the border
|
||||
xd->mode_info_context++;
|
||||
x->partition_info++;
|
||||
x->activity_sum += activity_sum;
|
||||
|
||||
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
|
||||
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
|
||||
x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
|
||||
|
||||
xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
|
||||
x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
|
||||
|
||||
if (mb_row == cm->mb_rows - 1)
|
||||
{
|
||||
//SetEvent(cpi->h_event_main);
|
||||
sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
(void) p_data;
|
||||
#endif
|
||||
|
||||
//printf("exit thread %d\n", ithread);
|
||||
return 0;
|
||||
}
|
||||
@@ -363,7 +361,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
|
||||
MACROBLOCK *x,
|
||||
MB_ROW_COMP *mbr_ei,
|
||||
@@ -414,7 +411,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
|
||||
mb->src.u_buffer += 8 * x->src.uv_stride * (i + 1);
|
||||
mb->src.v_buffer += 8 * x->src.uv_stride * (i + 1);
|
||||
|
||||
|
||||
vp8_build_block_offsets(mb);
|
||||
|
||||
vp8_setup_block_dptrs(mbd);
|
||||
@@ -431,17 +427,12 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8cx_create_encoder_threads(VP8_COMP *cpi)
|
||||
{
|
||||
cpi->b_multi_threaded = 0;
|
||||
|
||||
cpi->processor_core_count = 32; //vp8_get_proc_core_count();
|
||||
|
||||
CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
|
||||
if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
|
||||
{
|
||||
int ithread;
|
||||
@@ -451,14 +442,15 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
|
||||
else
|
||||
cpi->encoding_thread_count = cpi->oxcf.multi_threaded - 1;
|
||||
|
||||
|
||||
CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * cpi->encoding_thread_count));
|
||||
CHECK_MEM_ERROR(cpi->h_event_mbrencoding, vpx_malloc(sizeof(sem_t) * cpi->encoding_thread_count));
|
||||
CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * cpi->encoding_thread_count));
|
||||
CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count));
|
||||
vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count);
|
||||
CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * cpi->encoding_thread_count));
|
||||
CHECK_MEM_ERROR(cpi->mt_current_mb_col, vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cpi->common.mb_rows));
|
||||
|
||||
//cpi->h_event_main = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
sem_init(&cpi->h_event_main, 0, 0);
|
||||
sem_init(&cpi->h_event_end_encoding, 0, 0);
|
||||
|
||||
cpi->b_multi_threaded = 1;
|
||||
|
||||
@@ -466,11 +458,13 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
|
||||
|
||||
for (ithread = 0; ithread < cpi->encoding_thread_count; ithread++)
|
||||
{
|
||||
ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread];
|
||||
|
||||
//cpi->h_event_mbrencoding[ithread] = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
sem_init(&cpi->h_event_mbrencoding[ithread], 0, 0);
|
||||
cpi->en_thread_data[ithread].ithread = ithread;
|
||||
cpi->en_thread_data[ithread].ptr1 = (void *)cpi;
|
||||
cpi->en_thread_data[ithread].ptr2 = (void *)&cpi->mb_row_ei[ithread];
|
||||
sem_init(&cpi->h_event_start_encoding[ithread], 0, 0);
|
||||
ethd->ithread = ithread;
|
||||
ethd->ptr1 = (void *)cpi;
|
||||
ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread];
|
||||
|
||||
//printf(" call begin thread %d \n", ithread);
|
||||
|
||||
@@ -482,19 +476,15 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
|
||||
// 0,
|
||||
// NULL);
|
||||
|
||||
pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, (&cpi->en_thread_data[ithread]));
|
||||
|
||||
pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
|
||||
if (cpi->b_multi_threaded)
|
||||
{
|
||||
//shutdown other threads
|
||||
@@ -505,20 +495,21 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
//SetEvent(cpi->h_event_mbrencoding[i]);
|
||||
sem_post(&cpi->h_event_mbrencoding[i]);
|
||||
sem_post(&cpi->h_event_start_encoding[i]);
|
||||
pthread_join(cpi->h_encoding_thread[i], 0);
|
||||
}
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
sem_destroy(&cpi->h_event_mbrencoding[i]);
|
||||
sem_destroy(&cpi->h_event_start_encoding[i]);
|
||||
}
|
||||
}
|
||||
|
||||
sem_destroy(&cpi->h_event_end_encoding);
|
||||
|
||||
//free thread related resources
|
||||
vpx_free(cpi->h_event_mbrencoding);
|
||||
vpx_free(cpi->h_event_start_encoding);
|
||||
vpx_free(cpi->h_encoding_thread);
|
||||
vpx_free(cpi->mb_row_ei);
|
||||
vpx_free(cpi->en_thread_data);
|
||||
vpx_free(cpi->mt_current_mb_col);
|
||||
}
|
||||
|
||||
#endif
|
||||
vpx_free(cpi->tplist);
|
||||
}
|
||||
#endif
|
||||
|
@@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "math.h"
|
||||
#include "limits.h"
|
||||
#include "block.h"
|
||||
@@ -178,40 +177,68 @@ static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
return modified_err;
|
||||
}
|
||||
|
||||
static const double weight_table[256] = {
|
||||
0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
|
||||
0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
|
||||
0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
|
||||
0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
|
||||
0.020000, 0.031250, 0.062500, 0.093750, 0.125000, 0.156250, 0.187500, 0.218750,
|
||||
0.250000, 0.281250, 0.312500, 0.343750, 0.375000, 0.406250, 0.437500, 0.468750,
|
||||
0.500000, 0.531250, 0.562500, 0.593750, 0.625000, 0.656250, 0.687500, 0.718750,
|
||||
0.750000, 0.781250, 0.812500, 0.843750, 0.875000, 0.906250, 0.937500, 0.968750,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000
|
||||
};
|
||||
|
||||
double vp8_simple_weight(YV12_BUFFER_CONFIG *source)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
unsigned char *src = source->y_buffer;
|
||||
unsigned char value;
|
||||
double sum_weights = 0.0;
|
||||
double Weight;
|
||||
|
||||
// Loop throught the Y plane raw examining levels and creating a weight for the image
|
||||
for (i = 0; i < source->y_height; i++)
|
||||
i = source->y_height;
|
||||
do
|
||||
{
|
||||
for (j = 0; j < source->y_width; j++)
|
||||
j = source->y_width;
|
||||
do
|
||||
{
|
||||
value = src[j];
|
||||
|
||||
if (value >= 64)
|
||||
Weight = 1.0;
|
||||
else if (value > 32)
|
||||
Weight = (value - 32.0f) / 32.0f;
|
||||
else
|
||||
Weight = 0.02;
|
||||
|
||||
sum_weights += Weight;
|
||||
}
|
||||
|
||||
sum_weights += weight_table[ *src];
|
||||
src++;
|
||||
}while(--j);
|
||||
src -= source->y_width;
|
||||
src += source->y_stride;
|
||||
}
|
||||
}while(--i);
|
||||
|
||||
sum_weights /= (source->y_height * source->y_width);
|
||||
|
||||
return sum_weights;
|
||||
}
|
||||
|
||||
|
||||
// This function returns the current per frame maximum bitrate target
|
||||
int frame_max_bits(VP8_COMP *cpi)
|
||||
{
|
||||
@@ -440,7 +467,6 @@ void vp8_end_first_pass(VP8_COMP *cpi)
|
||||
vp8_output_stats(cpi, cpi->output_pkt_list, cpi->total_stats);
|
||||
}
|
||||
|
||||
|
||||
void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset )
|
||||
{
|
||||
MACROBLOCKD * const xd = & x->e_mbd;
|
||||
@@ -460,7 +486,6 @@ void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * r
|
||||
VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16) ( src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err));
|
||||
}
|
||||
|
||||
|
||||
void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *best_mv, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset )
|
||||
{
|
||||
MACROBLOCKD *const xd = & x->e_mbd;
|
||||
@@ -548,7 +573,6 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
|
||||
int sum_in_vectors = 0;
|
||||
|
||||
MV best_ref_mv = {0, 0};
|
||||
MV zero_ref_mv = {0, 0};
|
||||
|
||||
unsigned char *fp_motion_map_ptr = cpi->fp_motion_map;
|
||||
@@ -586,13 +610,20 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
// for each macroblock row in image
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
||||
{
|
||||
MV best_ref_mv = {0, 0};
|
||||
int_mv best_ref_mv;
|
||||
|
||||
best_ref_mv.as_int = 0;
|
||||
|
||||
// reset above block coeffs
|
||||
xd->up_available = (mb_row != 0);
|
||||
recon_yoffset = (mb_row * recon_y_stride * 16);
|
||||
recon_uvoffset = (mb_row * recon_uv_stride * 8);
|
||||
|
||||
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
|
||||
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
|
||||
|
||||
|
||||
// for each macroblock col in image
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
@@ -625,8 +656,6 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
|
||||
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
|
||||
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
|
||||
|
||||
// Other than for the first frame do a motion search
|
||||
if (cm->current_video_frame > 0)
|
||||
@@ -647,12 +676,12 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
|
||||
// Test last reference frame using the previous best mv as the
|
||||
// starting point (best reference) for the search
|
||||
vp8_first_pass_motion_search(cpi, x, &best_ref_mv,
|
||||
vp8_first_pass_motion_search(cpi, x, &best_ref_mv.as_mv,
|
||||
&d->bmi.mv.as_mv, lst_yv12,
|
||||
&motion_error, recon_yoffset);
|
||||
|
||||
// If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well
|
||||
if ((best_ref_mv.col != 0) || (best_ref_mv.row != 0))
|
||||
if (best_ref_mv.as_int)
|
||||
{
|
||||
tmp_err = INT_MAX;
|
||||
vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv,
|
||||
@@ -664,7 +693,6 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
d->bmi.mv.as_mv.row = tmp_mv.row;
|
||||
d->bmi.mv.as_mv.col = tmp_mv.col;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Experimental search in a second reference frame ((0,0) based only)
|
||||
@@ -693,6 +721,9 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
|
||||
}
|
||||
|
||||
/* Intra assumed best */
|
||||
best_ref_mv.as_int = 0;
|
||||
|
||||
if (motion_error <= this_error)
|
||||
{
|
||||
d->bmi.mv.as_mv.row <<= 3;
|
||||
@@ -708,13 +739,10 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
sum_mvcs += d->bmi.mv.as_mv.col * d->bmi.mv.as_mv.col;
|
||||
intercount++;
|
||||
|
||||
best_ref_mv.row = d->bmi.mv.as_mv.row;
|
||||
best_ref_mv.col = d->bmi.mv.as_mv.col;
|
||||
//best_ref_mv.row = 0;
|
||||
//best_ref_mv.col = 0;
|
||||
best_ref_mv.as_int = d->bmi.mv.as_int;
|
||||
|
||||
// Was the vector non-zero
|
||||
if (d->bmi.mv.as_mv.row || d->bmi.mv.as_mv.col)
|
||||
if (d->bmi.mv.as_int)
|
||||
{
|
||||
mvcount++;
|
||||
|
||||
@@ -770,12 +798,6 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
*fp_motion_map_ptr = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Intra was best
|
||||
best_ref_mv.row = 0;
|
||||
best_ref_mv.col = 0;
|
||||
}
|
||||
}
|
||||
|
||||
coded_error += this_error;
|
||||
@@ -813,6 +835,7 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
fps.coded_error = coded_error >> 8;
|
||||
weight = vp8_simple_weight(cpi->Source);
|
||||
|
||||
|
||||
if (weight < 0.1)
|
||||
weight = 0.1;
|
||||
|
||||
@@ -1316,6 +1339,43 @@ void vp8_end_second_pass(VP8_COMP *cpi)
|
||||
{
|
||||
}
|
||||
|
||||
// This function gives and estimate of how badly we believe
|
||||
// the predicition quality is decaying from frame to frame.
|
||||
double gf_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
|
||||
{
|
||||
double prediction_decay_rate;
|
||||
double motion_decay;
|
||||
double motion_pct = next_frame->pcnt_motion;
|
||||
|
||||
|
||||
// Initial basis is the % mbs inter coded
|
||||
prediction_decay_rate = next_frame->pcnt_inter;
|
||||
|
||||
// High % motion -> somewhat higher decay rate
|
||||
motion_decay = (1.0 - (motion_pct / 20.0));
|
||||
if (motion_decay < prediction_decay_rate)
|
||||
prediction_decay_rate = motion_decay;
|
||||
|
||||
// Adjustment to decay rate based on speed of motion
|
||||
{
|
||||
double this_mv_rabs;
|
||||
double this_mv_cabs;
|
||||
double distance_factor;
|
||||
|
||||
this_mv_rabs = fabs(next_frame->mvr_abs * motion_pct);
|
||||
this_mv_cabs = fabs(next_frame->mvc_abs * motion_pct);
|
||||
|
||||
distance_factor = sqrt((this_mv_rabs * this_mv_rabs) +
|
||||
(this_mv_cabs * this_mv_cabs)) / 250.0;
|
||||
distance_factor = ((distance_factor > 1.0)
|
||||
? 0.0 : (1.0 - distance_factor));
|
||||
if (distance_factor < prediction_decay_rate)
|
||||
prediction_decay_rate = distance_factor;
|
||||
}
|
||||
|
||||
return prediction_decay_rate;
|
||||
}
|
||||
|
||||
// Analyse and define a gf/arf group .
|
||||
static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
{
|
||||
@@ -1337,17 +1397,20 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
double decay_accumulator = 1.0;
|
||||
|
||||
double boost_factor = IIFACTOR;
|
||||
double loop_decay_rate = 1.00; // Starting decay rate
|
||||
double loop_decay_rate = 1.00; // Starting decay rate
|
||||
|
||||
double this_frame_mv_in_out = 0.0;
|
||||
double mv_in_out_accumulator = 0.0;
|
||||
double abs_mv_in_out_accumulator = 0.0;
|
||||
double mod_err_per_mb_accumulator = 0.0;
|
||||
|
||||
int max_bits = frame_max_bits(cpi); // Max for a single frame
|
||||
int max_bits = frame_max_bits(cpi); // Max for a single frame
|
||||
|
||||
unsigned char *fpmm_pos;
|
||||
|
||||
unsigned int allow_alt_ref =
|
||||
cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
|
||||
|
||||
cpi->gf_group_bits = 0;
|
||||
cpi->gf_decay_rate = 0;
|
||||
|
||||
@@ -1362,47 +1425,57 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
// Preload the stats for the next frame.
|
||||
mod_frame_err = calculate_modified_err(cpi, this_frame);
|
||||
|
||||
// Note the error of the frame at the start of the group (this will be the GF frame error if we code a normal gf
|
||||
// Note the error of the frame at the start of the group (this will be
|
||||
// the GF frame error if we code a normal gf
|
||||
gf_first_frame_err = mod_frame_err;
|
||||
|
||||
// Special treatment if the current frame is a key frame (which is also a gf).
|
||||
// If it is then its error score (and hence bit allocation) need to be subtracted out
|
||||
// from the calculation for the GF group
|
||||
// Special treatment if the current frame is a key frame (which is also
|
||||
// a gf). If it is then its error score (and hence bit allocation) need
|
||||
// to be subtracted out from the calculation for the GF group
|
||||
if (cpi->common.frame_type == KEY_FRAME)
|
||||
gf_group_err -= gf_first_frame_err;
|
||||
|
||||
// Scan forward to try and work out how many frames the next gf group should contain and
|
||||
// what level of boost is appropriate for the GF or ARF that will be coded with the group
|
||||
// Scan forward to try and work out how many frames the next gf group
|
||||
// should contain and what level of boost is appropriate for the GF
|
||||
// or ARF that will be coded with the group
|
||||
i = 0;
|
||||
|
||||
while (((i < cpi->static_scene_max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key))
|
||||
while (((i < cpi->static_scene_max_gf_interval) ||
|
||||
((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) &&
|
||||
(i < cpi->frames_to_key))
|
||||
{
|
||||
double r;
|
||||
double this_frame_mvr_ratio;
|
||||
double this_frame_mvc_ratio;
|
||||
double motion_decay;
|
||||
double motion_pct = next_frame.pcnt_motion;
|
||||
//double motion_pct = next_frame.pcnt_motion;
|
||||
double motion_pct;
|
||||
|
||||
i++; // Increment the loop counter
|
||||
i++; // Increment the loop counter
|
||||
|
||||
// Accumulate error score of frames in this gf group
|
||||
mod_frame_err = calculate_modified_err(cpi, this_frame);
|
||||
|
||||
gf_group_err += mod_frame_err;
|
||||
|
||||
mod_err_per_mb_accumulator += mod_frame_err / DOUBLE_DIVIDE_CHECK((double)cpi->common.MBs);
|
||||
mod_err_per_mb_accumulator +=
|
||||
mod_frame_err / DOUBLE_DIVIDE_CHECK((double)cpi->common.MBs);
|
||||
|
||||
if (EOF == vp8_input_stats(cpi, &next_frame))
|
||||
break;
|
||||
|
||||
// Accumulate motion stats.
|
||||
motion_pct = next_frame.pcnt_motion;
|
||||
mv_accumulator_rabs += fabs(next_frame.mvr_abs * motion_pct);
|
||||
mv_accumulator_cabs += fabs(next_frame.mvc_abs * motion_pct);
|
||||
|
||||
//Accumulate Motion In/Out of frame stats
|
||||
this_frame_mv_in_out = next_frame.mv_in_out_count * next_frame.pcnt_motion;
|
||||
mv_in_out_accumulator += next_frame.mv_in_out_count * next_frame.pcnt_motion;
|
||||
abs_mv_in_out_accumulator += fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion);
|
||||
this_frame_mv_in_out =
|
||||
next_frame.mv_in_out_count * motion_pct;
|
||||
mv_in_out_accumulator +=
|
||||
next_frame.mv_in_out_count * motion_pct;
|
||||
abs_mv_in_out_accumulator +=
|
||||
fabs(next_frame.mv_in_out_count * motion_pct);
|
||||
|
||||
// If there is a significant amount of motion
|
||||
if (motion_pct > 0.05)
|
||||
@@ -1431,7 +1504,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
}
|
||||
|
||||
// Underlying boost factor is based on inter intra error ratio
|
||||
r = (boost_factor * (next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error)));
|
||||
r = ( boost_factor *
|
||||
( next_frame.intra_error /
|
||||
DOUBLE_DIVIDE_CHECK(next_frame.coded_error)));
|
||||
|
||||
if (next_frame.intra_error > cpi->gf_intra_err_min)
|
||||
r = (IIKFACTOR2 * next_frame.intra_error /
|
||||
@@ -1440,54 +1515,76 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
r = (IIKFACTOR2 * cpi->gf_intra_err_min /
|
||||
DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
|
||||
|
||||
// Increase boost for frames where new data coming into frame (eg zoom out)
|
||||
// Slightly reduce boost if there is a net balance of motion out of the frame (zoom in)
|
||||
// Increase boost for frames where new data coming into frame
|
||||
// (eg zoom out). Slightly reduce boost if there is a net balance
|
||||
// of motion out of the frame (zoom in).
|
||||
// The range for this_frame_mv_in_out is -1.0 to +1.0
|
||||
if (this_frame_mv_in_out > 0.0)
|
||||
r += r * (this_frame_mv_in_out * 2.0);
|
||||
// In extreme case boost is halved
|
||||
else
|
||||
r += r * (this_frame_mv_in_out / 2.0); // In extreme case boost is halved
|
||||
r += r * (this_frame_mv_in_out / 2.0);
|
||||
|
||||
if (r > GF_RMAX)
|
||||
r = GF_RMAX;
|
||||
|
||||
// Adjust loop decay rate
|
||||
//if ( next_frame.pcnt_inter < loop_decay_rate )
|
||||
loop_decay_rate = next_frame.pcnt_inter;
|
||||
|
||||
// High % motion -> somewhat higher decay rate
|
||||
motion_decay = (1.0 - (motion_pct / 20.0));
|
||||
if (motion_decay < loop_decay_rate)
|
||||
loop_decay_rate = motion_decay;
|
||||
|
||||
// Adjustment to decay rate based on speed of motion
|
||||
{
|
||||
double this_mv_rabs;
|
||||
double this_mv_cabs;
|
||||
double distance_factor;
|
||||
|
||||
this_mv_rabs = fabs(next_frame.mvr_abs * motion_pct);
|
||||
this_mv_cabs = fabs(next_frame.mvc_abs * motion_pct);
|
||||
|
||||
distance_factor = sqrt((this_mv_rabs * this_mv_rabs) +
|
||||
(this_mv_cabs * this_mv_cabs)) / 250.0;
|
||||
distance_factor = ((distance_factor > 1.0)
|
||||
? 0.0 : (1.0 - distance_factor));
|
||||
if (distance_factor < loop_decay_rate)
|
||||
loop_decay_rate = distance_factor;
|
||||
}
|
||||
loop_decay_rate = gf_prediction_decay_rate(cpi, &next_frame);
|
||||
|
||||
// Cumulative effect of decay
|
||||
decay_accumulator = decay_accumulator * loop_decay_rate;
|
||||
decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator;
|
||||
//decay_accumulator = ( loop_decay_rate < decay_accumulator ) ? loop_decay_rate : decay_accumulator;
|
||||
|
||||
boost_score += (decay_accumulator * r);
|
||||
|
||||
// Break clause to detect very still sections after motion
|
||||
// For example a staic image after a fade or other transition
|
||||
// instead of a clean key frame.
|
||||
if ( (i > MIN_GF_INTERVAL) &&
|
||||
(loop_decay_rate >= 0.999) &&
|
||||
(decay_accumulator < 0.9) )
|
||||
{
|
||||
int j;
|
||||
FIRSTPASS_STATS * position = cpi->stats_in;
|
||||
FIRSTPASS_STATS tmp_next_frame;
|
||||
double decay_rate;
|
||||
|
||||
// Look ahead a few frames to see if static condition
|
||||
// persists...
|
||||
for ( j = 0; j < 4; j++ )
|
||||
{
|
||||
if (EOF == vp8_input_stats(cpi, &tmp_next_frame))
|
||||
break;
|
||||
|
||||
decay_rate = gf_prediction_decay_rate(cpi, &tmp_next_frame);
|
||||
if ( decay_rate < 0.999 )
|
||||
break;
|
||||
}
|
||||
reset_fpf_position(cpi, position); // Reset file position
|
||||
|
||||
// Force GF not alt ref
|
||||
if ( j == 4 )
|
||||
{
|
||||
if (0)
|
||||
{
|
||||
FILE *f = fopen("fadegf.stt", "a");
|
||||
fprintf(f, " %8d %8d %10.4f %10.4f %10.4f\n",
|
||||
cpi->common.current_video_frame+i, i,
|
||||
loop_decay_rate, decay_accumulator,
|
||||
boost_score );
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
allow_alt_ref = FALSE;
|
||||
|
||||
boost_score = old_boost_score;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Break out conditions.
|
||||
if ( /* i>4 || */
|
||||
// Break at cpi->max_gf_interval unless almost totally static
|
||||
(i >= cpi->max_gf_interval && (loop_decay_rate < 0.99)) ||
|
||||
(i >= cpi->max_gf_interval && (decay_accumulator < 0.995)) ||
|
||||
(
|
||||
// Dont break out with a very short interval
|
||||
(i > MIN_GF_INTERVAL) &&
|
||||
@@ -1509,7 +1606,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
old_boost_score = boost_score;
|
||||
}
|
||||
|
||||
cpi->gf_decay_rate = (i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0;
|
||||
cpi->gf_decay_rate =
|
||||
(i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0;
|
||||
|
||||
// When using CBR apply additional buffer related upper limits
|
||||
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
|
||||
@@ -1519,7 +1617,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
// For cbr apply buffer related limits
|
||||
if (cpi->drop_frames_allowed)
|
||||
{
|
||||
int df_buffer_level = cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100);
|
||||
int df_buffer_level = cpi->oxcf.drop_frames_water_mark *
|
||||
(cpi->oxcf.optimal_buffer_level / 100);
|
||||
|
||||
if (cpi->buffer_level > df_buffer_level)
|
||||
max_boost = ((double)((cpi->buffer_level - df_buffer_level) * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth);
|
||||
@@ -1542,10 +1641,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
cpi->gfu_boost = (int)(boost_score * 100.0) >> 4;
|
||||
|
||||
// Should we use the alternate refernce frame
|
||||
if (cpi->oxcf.play_alternate &&
|
||||
cpi->oxcf.lag_in_frames &&
|
||||
if (allow_alt_ref &&
|
||||
(i >= MIN_GF_INTERVAL) &&
|
||||
(i <= (cpi->frames_to_key - MIN_GF_INTERVAL)) && // dont use ARF very near next kf
|
||||
// dont use ARF very near next kf
|
||||
(i <= (cpi->frames_to_key - MIN_GF_INTERVAL)) &&
|
||||
(((next_frame.pcnt_inter > 0.75) &&
|
||||
((mv_in_out_accumulator / (double)i > -0.2) || (mv_in_out_accumulator > -2.0)) &&
|
||||
//(cpi->gfu_boost>150) &&
|
||||
@@ -2347,12 +2446,35 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
if (cpi->oxcf.auto_key
|
||||
&& cpi->frames_to_key > (int)cpi->key_frame_frequency )
|
||||
{
|
||||
FIRSTPASS_STATS *current_pos = cpi->stats_in;
|
||||
FIRSTPASS_STATS tmp_frame;
|
||||
|
||||
cpi->frames_to_key /= 2;
|
||||
|
||||
// Estimate corrected kf group error
|
||||
kf_group_err /= 2.0;
|
||||
kf_group_intra_err /= 2.0;
|
||||
kf_group_coded_err /= 2.0;
|
||||
// Copy first frame details
|
||||
vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame));
|
||||
|
||||
// Reset to the start of the group
|
||||
reset_fpf_position(cpi, start_position);
|
||||
|
||||
kf_group_err = 0;
|
||||
kf_group_intra_err = 0;
|
||||
kf_group_coded_err = 0;
|
||||
|
||||
// Rescan to get the correct error data for the forced kf group
|
||||
for( i = 0; i < cpi->frames_to_key; i++ )
|
||||
{
|
||||
// Accumulate kf group errors
|
||||
kf_group_err += calculate_modified_err(cpi, &tmp_frame);
|
||||
kf_group_intra_err += tmp_frame.intra_error;
|
||||
kf_group_coded_err += tmp_frame.coded_error;
|
||||
|
||||
// Load a the next frame's stats
|
||||
vp8_input_stats(cpi, &tmp_frame);
|
||||
}
|
||||
|
||||
// Reset to the start of the group
|
||||
reset_fpf_position(cpi, current_pos);
|
||||
|
||||
cpi->next_key_frame_forced = TRUE;
|
||||
}
|
||||
@@ -2451,7 +2573,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
{
|
||||
double r;
|
||||
double motion_decay;
|
||||
double motion_pct = next_frame.pcnt_motion;
|
||||
double motion_pct;
|
||||
|
||||
if (EOF == vp8_input_stats(cpi, &next_frame))
|
||||
break;
|
||||
@@ -2471,6 +2593,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
loop_decay_rate = next_frame.pcnt_inter;
|
||||
|
||||
// High % motion -> somewhat higher decay rate
|
||||
motion_pct = next_frame.pcnt_motion;
|
||||
motion_decay = (1.0 - (motion_pct / 20.0));
|
||||
if (motion_decay < loop_decay_rate)
|
||||
loop_decay_rate = motion_decay;
|
||||
|
@@ -779,15 +779,17 @@ int vp8_hex_search
|
||||
int *num00,
|
||||
const vp8_variance_fn_ptr_t *vfp,
|
||||
int *mvsadcost[2],
|
||||
int *mvcost[2]
|
||||
int *mvcost[2],
|
||||
MV *center_mv
|
||||
)
|
||||
{
|
||||
MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
|
||||
MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
|
||||
MV neighbors[8] = { { -1, -1}, {0, -1}, {1, -1}, { -1, 0}, {1, 0}, { -1, 1}, {0, 1}, {1, 1} } ;
|
||||
int i, j;
|
||||
unsigned char *src = (*(b->base_src) + b->src);
|
||||
int src_stride = b->src_stride;
|
||||
int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
|
||||
int rr = center_mv->row, rc = center_mv->col;
|
||||
int br = ref_mv->row >> 3, bc = ref_mv->col >> 3, tr, tc;
|
||||
unsigned int besterr, thiserr = 0x7fffffff;
|
||||
int k = -1, tk;
|
||||
|
||||
@@ -892,7 +894,7 @@ cal_neighbors:
|
||||
best_mv->row = br;
|
||||
best_mv->col = bc;
|
||||
|
||||
return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
|
||||
return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + vp8_mv_err_cost(best_mv, center_mv, mvcost, error_per_bit) ;
|
||||
}
|
||||
#undef MVC
|
||||
#undef PRE
|
||||
|
@@ -43,8 +43,8 @@ extern int vp8_hex_search
|
||||
int *num00,
|
||||
const vp8_variance_fn_ptr_t *vf,
|
||||
int *mvsadcost[2],
|
||||
int *mvcost[2]
|
||||
|
||||
int *mvcost[2],
|
||||
MV *center_mv
|
||||
);
|
||||
|
||||
typedef int (fractional_mv_step_fp)
|
||||
|
@@ -262,6 +262,10 @@ static void setup_features(VP8_COMP *cpi)
|
||||
|
||||
void vp8_dealloc_compressor_data(VP8_COMP *cpi)
|
||||
{
|
||||
if(cpi->tplist!=0)
|
||||
vpx_free(cpi->tplist);
|
||||
cpi->tplist = NULL;
|
||||
|
||||
// Delete last frame MV storage buffers
|
||||
if (cpi->lfmv != 0)
|
||||
vpx_free(cpi->lfmv);
|
||||
@@ -598,6 +602,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
|
||||
sf->first_step = 0;
|
||||
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
|
||||
sf->improved_mv_pred = 1;
|
||||
|
||||
cpi->do_full[0] = 0;
|
||||
cpi->do_full[1] = 0;
|
||||
@@ -640,34 +645,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
|
||||
sf->first_step = 0;
|
||||
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
|
||||
|
||||
if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEWMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
|
||||
}
|
||||
|
||||
if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEWG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITG ] = INT_MAX;
|
||||
}
|
||||
|
||||
if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEWA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITA ] = INT_MAX;
|
||||
}
|
||||
|
||||
break;
|
||||
case 1:
|
||||
case 3:
|
||||
@@ -725,41 +702,22 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->full_freq[0] = 15;
|
||||
sf->full_freq[1] = 31;
|
||||
|
||||
sf->first_step = 0;
|
||||
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
|
||||
|
||||
if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEWMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
|
||||
}
|
||||
|
||||
if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEWG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITG ] = INT_MAX;
|
||||
}
|
||||
|
||||
if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEWA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITA ] = INT_MAX;
|
||||
}
|
||||
|
||||
if (Speed > 0)
|
||||
{
|
||||
// Disable coefficient optimization above speed 0
|
||||
/* Disable coefficient optimization above speed 0 */
|
||||
sf->optimize_coefficients = 0;
|
||||
sf->use_fastquant_for_pick = 1;
|
||||
sf->no_skip_block4x4_search = 0;
|
||||
|
||||
sf->first_step = 1;
|
||||
|
||||
cpi->mode_check_freq[THR_SPLITG] = 2;
|
||||
cpi->mode_check_freq[THR_SPLITA] = 2;
|
||||
cpi->mode_check_freq[THR_SPLITMV] = 0;
|
||||
}
|
||||
|
||||
if (Speed > 1)
|
||||
{
|
||||
cpi->mode_check_freq[THR_SPLITG] = 4;
|
||||
cpi->mode_check_freq[THR_SPLITA] = 4;
|
||||
cpi->mode_check_freq[THR_SPLITMV] = 2;
|
||||
@@ -792,18 +750,10 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->thresh_mult[THR_NEWA ] = 2000;
|
||||
sf->thresh_mult[THR_SPLITA ] = 20000;
|
||||
}
|
||||
|
||||
sf->use_fastquant_for_pick = 1;
|
||||
|
||||
sf->first_step = 1;
|
||||
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
|
||||
sf->no_skip_block4x4_search = 0;
|
||||
}
|
||||
|
||||
if (Speed > 1)
|
||||
if (Speed > 2)
|
||||
{
|
||||
sf->use_fastquant_for_pick = 0;
|
||||
|
||||
cpi->mode_check_freq[THR_SPLITG] = 15;
|
||||
cpi->mode_check_freq[THR_SPLITA] = 15;
|
||||
cpi->mode_check_freq[THR_SPLITMV] = 7;
|
||||
@@ -837,8 +787,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->thresh_mult[THR_SPLITA ] = 50000;
|
||||
}
|
||||
|
||||
sf->first_step = 1;
|
||||
|
||||
sf->improved_quant = 0;
|
||||
sf->improved_dct = 0;
|
||||
|
||||
@@ -848,38 +796,14 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
|
||||
sf->full_freq[0] = 31;
|
||||
sf->full_freq[1] = 63;
|
||||
|
||||
}
|
||||
|
||||
if (Speed > 2)
|
||||
{
|
||||
sf->auto_filter = 0; // Faster selection of loop filter
|
||||
cpi->mode_check_freq[THR_V_PRED] = 2;
|
||||
cpi->mode_check_freq[THR_H_PRED] = 2;
|
||||
cpi->mode_check_freq[THR_B_PRED] = 2;
|
||||
|
||||
if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
|
||||
{
|
||||
cpi->mode_check_freq[THR_NEARG] = 2;
|
||||
cpi->mode_check_freq[THR_NEWG] = 4;
|
||||
}
|
||||
|
||||
if (cpi->ref_frame_flags & VP8_ALT_FLAG)
|
||||
{
|
||||
cpi->mode_check_freq[THR_NEARA] = 2;
|
||||
cpi->mode_check_freq[THR_NEWA] = 4;
|
||||
}
|
||||
|
||||
sf->thresh_mult[THR_SPLITA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
|
||||
|
||||
sf->full_freq[0] = 63;
|
||||
sf->full_freq[1] = 127;
|
||||
}
|
||||
|
||||
if (Speed > 3)
|
||||
{
|
||||
sf->thresh_mult[THR_SPLITA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
|
||||
|
||||
cpi->mode_check_freq[THR_V_PRED] = 0;
|
||||
cpi->mode_check_freq[THR_H_PRED] = 0;
|
||||
cpi->mode_check_freq[THR_B_PRED] = 0;
|
||||
@@ -891,13 +815,16 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->auto_filter = 1;
|
||||
sf->recode_loop = 0; // recode loop off
|
||||
sf->RD = 0; // Turn rd off
|
||||
sf->full_freq[0] = INT_MAX;
|
||||
sf->full_freq[1] = INT_MAX;
|
||||
|
||||
sf->full_freq[0] = 63;
|
||||
sf->full_freq[1] = 127;
|
||||
}
|
||||
|
||||
if (Speed > 4)
|
||||
{
|
||||
sf->auto_filter = 0; // Faster selection of loop filter
|
||||
sf->full_freq[0] = INT_MAX;
|
||||
sf->full_freq[1] = INT_MAX;
|
||||
|
||||
cpi->mode_check_freq[THR_V_PRED] = 2;
|
||||
cpi->mode_check_freq[THR_H_PRED] = 2;
|
||||
@@ -963,33 +890,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->full_freq[1] = 31;
|
||||
sf->search_method = NSTEP;
|
||||
|
||||
if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEWMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
|
||||
}
|
||||
|
||||
if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEWG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITG ] = INT_MAX;
|
||||
}
|
||||
|
||||
if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEWA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITA ] = INT_MAX;
|
||||
}
|
||||
|
||||
if (Speed > 0)
|
||||
{
|
||||
cpi->mode_check_freq[THR_SPLITG] = 4;
|
||||
@@ -1118,6 +1018,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
#else
|
||||
sf->search_method = DIAMOND;
|
||||
#endif
|
||||
sf->iterative_sub_pixel = 0;
|
||||
|
||||
cpi->mode_check_freq[THR_V_PRED] = 4;
|
||||
cpi->mode_check_freq[THR_H_PRED] = 4;
|
||||
@@ -1169,7 +1070,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
int total_skip;
|
||||
|
||||
int min = 2000;
|
||||
sf->iterative_sub_pixel = 0;
|
||||
|
||||
if (cpi->oxcf.encode_breakout > 2000)
|
||||
min = cpi->oxcf.encode_breakout;
|
||||
@@ -1225,6 +1125,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
sf->thresh_mult[THR_V_PRED] = INT_MAX;
|
||||
sf->thresh_mult[THR_H_PRED] = INT_MAX;
|
||||
|
||||
sf->improved_mv_pred = 0;
|
||||
}
|
||||
|
||||
if (Speed > 8)
|
||||
@@ -1270,7 +1171,36 @@ void vp8_set_speed_features(VP8_COMP *cpi)
|
||||
|
||||
vpx_memset(cpi->error_bins, 0, sizeof(cpi->error_bins));
|
||||
|
||||
};
|
||||
}; /* switch */
|
||||
|
||||
/* disable frame modes if flags not set */
|
||||
if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEWMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARMV ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
|
||||
}
|
||||
|
||||
if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEWG ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITG ] = INT_MAX;
|
||||
}
|
||||
|
||||
if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
|
||||
{
|
||||
sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_ZEROA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEARA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_NEWA ] = INT_MAX;
|
||||
sf->thresh_mult[THR_SPLITA ] = INT_MAX;
|
||||
}
|
||||
|
||||
|
||||
// Slow quant, dct and trellis not worthwhile for first pass
|
||||
// so make sure they are always turned off.
|
||||
@@ -1465,6 +1395,22 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
|
||||
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate firstpass stats");
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (width < 640)
|
||||
cpi->mt_sync_range = 1;
|
||||
else if (width <= 1280)
|
||||
cpi->mt_sync_range = 4;
|
||||
else if (width <= 2560)
|
||||
cpi->mt_sync_range = 8;
|
||||
else
|
||||
cpi->mt_sync_range = 16;
|
||||
#endif
|
||||
|
||||
if(cpi->tplist);
|
||||
vpx_free(cpi->tplist);
|
||||
|
||||
CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
|
||||
}
|
||||
|
||||
|
||||
@@ -2187,7 +2133,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
|
||||
cpi->common.error.setjmp = 1;
|
||||
|
||||
CHECK_MEM_ERROR(cpi->rdtok, vpx_calloc(256 * 3 / 2, sizeof(TOKENEXTRA)));
|
||||
CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1));
|
||||
|
||||
vp8_create_common(&cpi->common);
|
||||
@@ -2224,9 +2169,9 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
cpi->gold_is_alt = 0 ;
|
||||
|
||||
// allocate memory for storing last frame's MVs for MV prediction.
|
||||
CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int_mv)));
|
||||
CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int)));
|
||||
CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+1) * (cpi->common.mb_cols+1), sizeof(int)));
|
||||
CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int_mv)));
|
||||
CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int)));
|
||||
CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int)));
|
||||
|
||||
// Create the encoder segmentation map and set all entries to 0
|
||||
CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
|
||||
@@ -2417,7 +2362,9 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
init_mv_ref_counts();
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
vp8cx_create_encoder_threads(cpi);
|
||||
#endif
|
||||
|
||||
cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);
|
||||
cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);
|
||||
@@ -2692,12 +2639,13 @@ void vp8_remove_compressor(VP8_PTR *ptr)
|
||||
|
||||
}
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
vp8cx_remove_encoder_threads(cpi);
|
||||
#endif
|
||||
|
||||
vp8_dealloc_compressor_data(cpi);
|
||||
vpx_free(cpi->mb.ss);
|
||||
vpx_free(cpi->tok);
|
||||
vpx_free(cpi->rdtok);
|
||||
vpx_free(cpi->cyclic_refresh_map);
|
||||
|
||||
vp8_remove_common(&cpi->common);
|
||||
@@ -3114,11 +3062,14 @@ static int pick_frame_size(VP8_COMP *cpi)
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void set_quantizer(VP8_COMP *cpi, int Q)
|
||||
{
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
MACROBLOCKD *mbd = &cpi->mb.e_mbd;
|
||||
int update = 0;
|
||||
|
||||
update |= cm->base_qindex != Q;
|
||||
cm->base_qindex = Q;
|
||||
|
||||
cm->y1dc_delta_q = 0;
|
||||
@@ -3127,11 +3078,21 @@ static void set_quantizer(VP8_COMP *cpi, int Q)
|
||||
cm->uvdc_delta_q = 0;
|
||||
cm->uvac_delta_q = 0;
|
||||
|
||||
if(Q<4)
|
||||
{
|
||||
update |= cm->y2dc_delta_q != 4-Q;
|
||||
cm->y2dc_delta_q = 4-Q;
|
||||
}
|
||||
|
||||
// Set Segment specific quatizers
|
||||
mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0];
|
||||
mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1];
|
||||
mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2];
|
||||
mbd->segment_feature_data[MB_LVL_ALT_Q][3] = cpi->segment_feature_data[MB_LVL_ALT_Q][3];
|
||||
|
||||
if(update)
|
||||
vp8cx_init_quantizer(cpi);
|
||||
|
||||
}
|
||||
|
||||
static void update_alt_ref_frame_and_stats(VP8_COMP *cpi)
|
||||
@@ -3601,6 +3562,17 @@ static void encode_frame_to_data_rate
|
||||
// Test code for segmentation of gf/arf (0,0)
|
||||
//segmentation_test_function((VP8_PTR) cpi);
|
||||
|
||||
#if CONFIG_REALTIME_ONLY
|
||||
if(cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME)
|
||||
{
|
||||
if(cpi->force_next_frame_intra)
|
||||
{
|
||||
cm->frame_type = KEY_FRAME; /* delayed intra frame */
|
||||
}
|
||||
}
|
||||
cpi->force_next_frame_intra = 0;
|
||||
#endif
|
||||
|
||||
// For an alt ref frame in 2 pass we skip the call to the second pass function that sets the target bandwidth
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
|
||||
@@ -3853,7 +3825,7 @@ static void encode_frame_to_data_rate
|
||||
// One pass more conservative
|
||||
else
|
||||
cpi->active_best_quality = kf_high_motion_minq[Q];
|
||||
}
|
||||
}
|
||||
|
||||
else if (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame)
|
||||
{
|
||||
@@ -4111,6 +4083,14 @@ static void encode_frame_to_data_rate
|
||||
// (assuming that we didn't)!
|
||||
if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME)
|
||||
{
|
||||
|
||||
#if CONFIG_REALTIME_ONLY
|
||||
{
|
||||
/* we don't do re-encoding in realtime mode
|
||||
* if key frame is decided than we force it on next frame */
|
||||
cpi->force_next_frame_intra = decide_key_frame(cpi);
|
||||
}
|
||||
#else
|
||||
if (decide_key_frame(cpi))
|
||||
{
|
||||
vp8_calc_auto_iframe_target_size(cpi);
|
||||
@@ -4149,6 +4129,7 @@ static void encode_frame_to_data_rate
|
||||
resize_key_frame(cpi);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
vp8_clear_system_state();
|
||||
@@ -4188,7 +4169,7 @@ static void encode_frame_to_data_rate
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
// The key frame is not good enough
|
||||
if ( kf_err > ((cpi->ambient_err * 3) >> 2) )
|
||||
if ( kf_err > ((cpi->ambient_err * 7) >> 3) )
|
||||
{
|
||||
// Lower q_high
|
||||
q_high = (Q > q_low) ? (Q - 1) : q_low;
|
||||
@@ -4386,32 +4367,33 @@ static void encode_frame_to_data_rate
|
||||
}
|
||||
|
||||
// This frame's MVs are saved and will be used in next frame's MV prediction.
|
||||
// Last frame has one more line(add to bottom) and one more column(add to right) than cm->mip. The edge elements are initialized to 0.
|
||||
if(cm->show_frame) //do not save for altref frame
|
||||
{
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
MODE_INFO *tmp = cm->mip; //point to beginning of allocated MODE_INFO arrays.
|
||||
//static int last_video_frame = 0;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
MODE_INFO *tmp = cm->mip; //point to beginning of allocated MODE_INFO arrays.
|
||||
|
||||
if(cm->frame_type != KEY_FRAME)
|
||||
{
|
||||
for (mb_row = 0; mb_row < cm->mb_rows+1; mb_row ++)
|
||||
if(cm->frame_type != KEY_FRAME)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols+1; mb_col ++)
|
||||
{
|
||||
if(tmp->mbmi.ref_frame != INTRA_FRAME)
|
||||
cpi->lfmv[mb_col + mb_row*(cm->mode_info_stride)].as_int = tmp->mbmi.mv.as_int;
|
||||
for (mb_row = 0; mb_row < cm->mb_rows+1; mb_row ++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols+1; mb_col ++)
|
||||
{
|
||||
if(tmp->mbmi.ref_frame != INTRA_FRAME)
|
||||
cpi->lfmv[mb_col + mb_row*(cm->mode_info_stride+1)].as_int = tmp->mbmi.mv.as_int;
|
||||
|
||||
cpi->lf_ref_frame_sign_bias[mb_col + mb_row*(cm->mode_info_stride)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame];
|
||||
cpi->lf_ref_frame[mb_col + mb_row*(cm->mode_info_stride)] = tmp->mbmi.ref_frame;
|
||||
tmp++;
|
||||
}
|
||||
cpi->lf_ref_frame_sign_bias[mb_col + mb_row*(cm->mode_info_stride+1)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame];
|
||||
cpi->lf_ref_frame[mb_col + mb_row*(cm->mode_info_stride+1)] = tmp->mbmi.ref_frame;
|
||||
tmp++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update the GF useage maps.
|
||||
// This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
|
||||
// This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
|
||||
vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
|
||||
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
@@ -4614,7 +4596,8 @@ static void encode_frame_to_data_rate
|
||||
}
|
||||
|
||||
// Update the buffer level variable.
|
||||
if (cpi->common.refresh_alt_ref_frame)
|
||||
// Non-viewable frames are a special case and are treated as pure overhead.
|
||||
if ( !cm->show_frame )
|
||||
cpi->bits_off_target -= cpi->projected_frame_size;
|
||||
else
|
||||
cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size;
|
||||
|
@@ -28,6 +28,7 @@
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "mcomp.h"
|
||||
#include "temporal_filter.h"
|
||||
#include "findnearmv.h"
|
||||
|
||||
//#define SPEEDSTATS 1
|
||||
#define MIN_GF_INTERVAL 4
|
||||
@@ -184,17 +185,15 @@ typedef struct
|
||||
|
||||
int use_fastquant_for_pick;
|
||||
int no_skip_block4x4_search;
|
||||
int improved_mv_pred;
|
||||
|
||||
} SPEED_FEATURES;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
MACROBLOCK mb;
|
||||
int mb_row;
|
||||
TOKENEXTRA *tp;
|
||||
int segment_counts[MAX_MB_SEGMENTS];
|
||||
int totalrate;
|
||||
int current_mb_col;
|
||||
} MB_ROW_COMP;
|
||||
|
||||
typedef struct
|
||||
@@ -245,12 +244,6 @@ enum
|
||||
BLOCK_MAX_SEGMENTS
|
||||
};
|
||||
|
||||
typedef union
|
||||
{
|
||||
unsigned int as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates rapid equality tests */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
||||
@@ -309,8 +302,6 @@ typedef struct
|
||||
|
||||
YV12_BUFFER_CONFIG last_frame_uf;
|
||||
|
||||
char *Dest;
|
||||
|
||||
TOKENEXTRA *tok;
|
||||
unsigned int tok_count;
|
||||
|
||||
@@ -343,11 +334,6 @@ typedef struct
|
||||
int RDMULT;
|
||||
int RDDIV ;
|
||||
|
||||
TOKENEXTRA *rdtok;
|
||||
vp8_writer rdbc;
|
||||
int intra_mode_costs[10];
|
||||
|
||||
|
||||
CODING_CONTEXT coding_context;
|
||||
|
||||
// Rate targetting variables
|
||||
@@ -355,7 +341,6 @@ typedef struct
|
||||
long long last_prediction_error;
|
||||
long long intra_error;
|
||||
long long last_intra_error;
|
||||
long long last_auto_filter_prediction_error;
|
||||
|
||||
#if 0
|
||||
// Experimental RD code
|
||||
@@ -560,8 +545,6 @@ typedef struct
|
||||
|
||||
int ref_frame_flags;
|
||||
|
||||
int exp[512];
|
||||
|
||||
SPEED_FEATURES sf;
|
||||
int error_bins[1024];
|
||||
|
||||
@@ -607,22 +590,21 @@ typedef struct
|
||||
int cyclic_refresh_q;
|
||||
signed char *cyclic_refresh_map;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
// multithread data
|
||||
int current_mb_col_main;
|
||||
int * mt_current_mb_col;
|
||||
int mt_sync_range;
|
||||
int processor_core_count;
|
||||
int b_multi_threaded;
|
||||
int encoding_thread_count;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
pthread_t *h_encoding_thread;
|
||||
#endif
|
||||
MB_ROW_COMP *mb_row_ei;
|
||||
ENCODETHREAD_DATA *en_thread_data;
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
//events
|
||||
sem_t *h_event_mbrencoding;
|
||||
sem_t h_event_main;
|
||||
sem_t *h_event_start_encoding;
|
||||
sem_t h_event_end_encoding;
|
||||
#endif
|
||||
|
||||
TOKENLIST *tplist;
|
||||
@@ -694,6 +676,9 @@ typedef struct
|
||||
int *lf_ref_frame_sign_bias;
|
||||
int *lf_ref_frame;
|
||||
|
||||
#if CONFIG_REALTIME_ONLY
|
||||
int force_next_frame_intra; /* force next frame to intra when kf_auto says so */
|
||||
#endif
|
||||
} VP8_COMP;
|
||||
|
||||
void control_data_rate(VP8_COMP *cpi);
|
||||
|
@@ -24,7 +24,7 @@
|
||||
#include "g_common.h"
|
||||
#include "variance.h"
|
||||
#include "mcomp.h"
|
||||
|
||||
#include "rdopt.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
@@ -168,8 +168,6 @@ static int pick_intra4x4block(
|
||||
B_PREDICTION_MODE *best_mode,
|
||||
B_PREDICTION_MODE above,
|
||||
B_PREDICTION_MODE left,
|
||||
ENTROPY_CONTEXT *a,
|
||||
ENTROPY_CONTEXT *l,
|
||||
|
||||
int *bestrate,
|
||||
int *bestdistortion)
|
||||
@@ -179,8 +177,6 @@ static int pick_intra4x4block(
|
||||
int rate;
|
||||
int distortion;
|
||||
unsigned int *mode_costs;
|
||||
(void) l;
|
||||
(void) a;
|
||||
|
||||
if (x->e_mbd.frame_type == KEY_FRAME)
|
||||
{
|
||||
@@ -211,6 +207,7 @@ static int pick_intra4x4block(
|
||||
|
||||
b->bmi.mode = (B_PREDICTION_MODE)(*best_mode);
|
||||
vp8_encode_intra4x4block(rtcd, x, be, b, b->bmi.mode);
|
||||
|
||||
return best_rd;
|
||||
}
|
||||
|
||||
@@ -220,17 +217,8 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int
|
||||
MACROBLOCKD *const xd = &mb->e_mbd;
|
||||
int i;
|
||||
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
|
||||
int error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, 0); // Rd estimate for the cost of the block prediction mode
|
||||
int error;
|
||||
int distortion = 0;
|
||||
ENTROPY_CONTEXT_PLANES t_above, t_left;
|
||||
ENTROPY_CONTEXT *ta;
|
||||
ENTROPY_CONTEXT *tl;
|
||||
|
||||
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
|
||||
ta = (ENTROPY_CONTEXT *)&t_above;
|
||||
tl = (ENTROPY_CONTEXT *)&t_left;
|
||||
|
||||
vp8_intra_prediction_down_copy(xd);
|
||||
|
||||
@@ -243,10 +231,8 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int
|
||||
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
|
||||
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(d);
|
||||
|
||||
error += pick_intra4x4block(rtcd,
|
||||
mb, mb->block + i, xd->block + i, &best_mode, A, L,
|
||||
ta + vp8_block2above[i],
|
||||
tl + vp8_block2left[i], &r, &d);
|
||||
pick_intra4x4block(rtcd, mb, mb->block + i, xd->block + i,
|
||||
&best_mode, A, L, &r, &d);
|
||||
|
||||
cost += r;
|
||||
distortion += d;
|
||||
@@ -264,10 +250,15 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int
|
||||
*Rate = cost;
|
||||
|
||||
if (i == 16)
|
||||
{
|
||||
*best_dist = distortion;
|
||||
error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, distortion);
|
||||
}
|
||||
else
|
||||
{
|
||||
*best_dist = INT_MAX;
|
||||
|
||||
error = INT_MAX;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
@@ -421,7 +412,6 @@ int vp8_pick_intra_mbuv_mode(MACROBLOCK *mb)
|
||||
|
||||
}
|
||||
|
||||
|
||||
int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra)
|
||||
{
|
||||
BLOCK *b = &x->block[0];
|
||||
@@ -430,7 +420,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
B_MODE_INFO best_bmodes[16];
|
||||
MB_MODE_INFO best_mbmode;
|
||||
PARTITION_INFO best_partition;
|
||||
MV best_ref_mv1;
|
||||
MV best_ref_mv;
|
||||
MV mode_mv[MB_MODE_COUNT];
|
||||
MB_PREDICTION_MODE this_mode;
|
||||
int num00;
|
||||
@@ -448,9 +438,14 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
int best_mode_index = 0;
|
||||
int sse = INT_MAX;
|
||||
|
||||
MV mvp;
|
||||
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
int saddone=0;
|
||||
int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7)
|
||||
|
||||
MV nearest_mv[4];
|
||||
MV near_mv[4];
|
||||
MV best_ref_mv[4];
|
||||
MV frame_best_ref_mv[4];
|
||||
int MDCounts[4][4];
|
||||
unsigned char *y_buffer[4];
|
||||
unsigned char *u_buffer[4];
|
||||
@@ -470,7 +465,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
|
||||
|
||||
vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[LAST_FRAME], &near_mv[LAST_FRAME],
|
||||
&best_ref_mv[LAST_FRAME], MDCounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias);
|
||||
&frame_best_ref_mv[LAST_FRAME], MDCounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias);
|
||||
|
||||
y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset;
|
||||
u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset;
|
||||
@@ -484,7 +479,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
|
||||
|
||||
vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[GOLDEN_FRAME], &near_mv[GOLDEN_FRAME],
|
||||
&best_ref_mv[GOLDEN_FRAME], MDCounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias);
|
||||
&frame_best_ref_mv[GOLDEN_FRAME], MDCounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias);
|
||||
|
||||
y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset;
|
||||
u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset;
|
||||
@@ -498,7 +493,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
|
||||
|
||||
vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[ALTREF_FRAME], &near_mv[ALTREF_FRAME],
|
||||
&best_ref_mv[ALTREF_FRAME], MDCounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias);
|
||||
&frame_best_ref_mv[ALTREF_FRAME], MDCounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias);
|
||||
|
||||
y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset;
|
||||
u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset;
|
||||
@@ -538,10 +533,6 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
+ vp8_cost_one(cpi->prob_gf_coded);
|
||||
}
|
||||
|
||||
|
||||
|
||||
best_rd = INT_MAX;
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
|
||||
|
||||
// if we encode a new mv this is important
|
||||
@@ -604,7 +595,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
|
||||
mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
|
||||
mode_mv[NEARMV] = near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
|
||||
best_ref_mv1 = best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
|
||||
best_ref_mv = frame_best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
|
||||
memcpy(mdcounts, MDCounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
|
||||
}
|
||||
|
||||
@@ -617,6 +608,28 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
continue;
|
||||
}
|
||||
|
||||
if(cpi->sf.improved_mv_pred && x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
|
||||
{
|
||||
if(!saddone)
|
||||
{
|
||||
vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
|
||||
saddone = 1;
|
||||
}
|
||||
|
||||
vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
|
||||
|
||||
/* adjust mvp to make sure it is within MV range */
|
||||
if(mvp.row > best_ref_mv.row + MAX_FULL_PEL_VAL)
|
||||
mvp.row = best_ref_mv.row + MAX_FULL_PEL_VAL;
|
||||
else if(mvp.row < best_ref_mv.row - MAX_FULL_PEL_VAL)
|
||||
mvp.row = best_ref_mv.row - MAX_FULL_PEL_VAL;
|
||||
if(mvp.col > best_ref_mv.col + MAX_FULL_PEL_VAL)
|
||||
mvp.col = best_ref_mv.col + MAX_FULL_PEL_VAL;
|
||||
else if(mvp.col < best_ref_mv.col - MAX_FULL_PEL_VAL)
|
||||
mvp.col = best_ref_mv.col - MAX_FULL_PEL_VAL;
|
||||
}
|
||||
|
||||
switch (this_mode)
|
||||
{
|
||||
case B_PRED:
|
||||
@@ -672,61 +685,59 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
int n = 0;
|
||||
int sadpb = x->sadperbit16;
|
||||
|
||||
int col_min;
|
||||
int col_max;
|
||||
int row_min;
|
||||
int row_max;
|
||||
|
||||
int tmp_col_min = x->mv_col_min;
|
||||
int tmp_col_max = x->mv_col_max;
|
||||
int tmp_row_min = x->mv_row_min;
|
||||
int tmp_row_max = x->mv_row_max;
|
||||
|
||||
int speed_adjust = (cpi->Speed > 5) ? ((cpi->Speed >= 8)? 3 : 2) : 1;
|
||||
|
||||
// Further step/diamond searches as necessary
|
||||
if (cpi->Speed < 8)
|
||||
step_param = cpi->sf.first_step + speed_adjust;
|
||||
|
||||
if(cpi->sf.improved_mv_pred)
|
||||
{
|
||||
step_param = cpi->sf.first_step + ((cpi->Speed > 5) ? 1 : 0);
|
||||
further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
|
||||
}
|
||||
else
|
||||
sr += speed_adjust;
|
||||
//adjust search range according to sr from mv prediction
|
||||
if(sr > step_param)
|
||||
step_param = sr;
|
||||
|
||||
col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3;
|
||||
col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3;
|
||||
row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3;
|
||||
row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3;
|
||||
|
||||
// Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
|
||||
if (x->mv_col_min < col_min )
|
||||
x->mv_col_min = col_min;
|
||||
if (x->mv_col_max > col_max )
|
||||
x->mv_col_max = col_max;
|
||||
if (x->mv_row_min < row_min )
|
||||
x->mv_row_min = row_min;
|
||||
if (x->mv_row_max > row_max )
|
||||
x->mv_row_max = row_max;
|
||||
}else
|
||||
{
|
||||
step_param = cpi->sf.first_step + 2;
|
||||
further_steps = 0;
|
||||
mvp.row = best_ref_mv.row;
|
||||
mvp.col = best_ref_mv.col;
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
// Initial step Search
|
||||
bestsme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, cpi->mb.mvcost, &best_ref_mv1);
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
|
||||
// Further step searches
|
||||
while (n < further_steps)
|
||||
{
|
||||
n++;
|
||||
|
||||
if (num00)
|
||||
num00--;
|
||||
else
|
||||
{
|
||||
thissme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, x->mvcost, &best_ref_mv1);
|
||||
|
||||
if (thissme < bestsme)
|
||||
{
|
||||
bestsme = thissme;
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
}
|
||||
else
|
||||
{
|
||||
d->bmi.mv.as_mv.row = mode_mv[NEWMV].row;
|
||||
d->bmi.mv.as_mv.col = mode_mv[NEWMV].col;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
further_steps = (cpi->Speed >= 8)? 0: (cpi->sf.max_step_search_steps - 1 - step_param);
|
||||
|
||||
if (cpi->sf.search_method == HEX)
|
||||
{
|
||||
bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);
|
||||
bestsme = vp8_hex_search(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv);
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
}
|
||||
else
|
||||
{
|
||||
bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv1); //sadpb < 9
|
||||
bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb < 9
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
|
||||
@@ -745,7 +756,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
num00--;
|
||||
else
|
||||
{
|
||||
thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv1); //sadpb = 9
|
||||
thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb = 9
|
||||
|
||||
if (thissme < bestsme)
|
||||
{
|
||||
@@ -762,19 +773,24 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
if(cpi->sf.improved_mv_pred)
|
||||
{
|
||||
x->mv_col_min = tmp_col_min;
|
||||
x->mv_col_max = tmp_col_max;
|
||||
x->mv_row_min = tmp_row_min;
|
||||
x->mv_row_max = tmp_row_max;
|
||||
}
|
||||
|
||||
if (bestsme < INT_MAX)
|
||||
cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost);
|
||||
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
|
||||
// mv cost;
|
||||
rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, cpi->mb.mvcost, 128);
|
||||
}
|
||||
|
||||
if (bestsme < INT_MAX)
|
||||
cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost);
|
||||
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
|
||||
// mv cost;
|
||||
rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv1, cpi->mb.mvcost, 128);
|
||||
|
||||
|
||||
case NEARESTMV:
|
||||
case NEARMV:
|
||||
|
||||
|
@@ -296,7 +296,6 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
int filt_err = 0;
|
||||
int min_filter_level;
|
||||
int max_filter_level;
|
||||
int prediction_difference = (int)(100 * abs((int)(cpi->last_auto_filter_prediction_error - cpi->prediction_error)) / (1 + cpi->prediction_error));
|
||||
|
||||
int filter_step;
|
||||
int filt_high = 0;
|
||||
@@ -478,6 +477,5 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
||||
cpi->last_auto_filt_val = filt_best;
|
||||
cpi->last_auto_filt_q = cm->base_qindex;
|
||||
|
||||
cpi->last_auto_filter_prediction_error = cpi->prediction_error;
|
||||
cpi->frames_since_auto_filter = 0;
|
||||
}
|
||||
|
@@ -129,9 +129,6 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
rc = vp8_default_zig_zag1d[i];
|
||||
z = coeff_ptr[rc];
|
||||
|
||||
//if ( i == 0 )
|
||||
// zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2;
|
||||
//else
|
||||
zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
|
||||
|
||||
zbin_boost_ptr ++;
|
||||
@@ -144,13 +141,13 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
y = (((x * quant_ptr[rc]) >> 16) + x)
|
||||
>> quant_shift_ptr[rc]; // quantize (x)
|
||||
x = (y ^ sz) - sz; // get the sign back
|
||||
qcoeff_ptr[rc] = x; // write to destination
|
||||
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
|
||||
qcoeff_ptr[rc] = x; // write to destination
|
||||
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
|
||||
|
||||
if (y)
|
||||
{
|
||||
eob = i; // last nonzero coeffs
|
||||
zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength
|
||||
zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -43,7 +43,9 @@
|
||||
#endif
|
||||
|
||||
|
||||
void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
|
||||
extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
|
||||
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
|
||||
|
||||
|
||||
#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
|
||||
|
||||
@@ -241,10 +243,9 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
|
||||
cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) >> 4;
|
||||
}
|
||||
|
||||
if (cpi->RDMULT < 125)
|
||||
cpi->RDMULT = 125;
|
||||
|
||||
cpi->mb.errorperbit = (cpi->RDMULT / 100);
|
||||
cpi->mb.errorperbit += (cpi->mb.errorperbit==0);
|
||||
|
||||
vp8_set_speed_features(cpi);
|
||||
|
||||
if (cpi->common.simpler_lpf)
|
||||
@@ -537,15 +538,79 @@ static int vp8_rdcost_mby(MACROBLOCK *mb)
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void macro_block_yrd( MACROBLOCK *mb,
|
||||
int *Rate,
|
||||
int *Distortion,
|
||||
const vp8_encodemb_rtcd_vtable_t *rtcd)
|
||||
{
|
||||
int b;
|
||||
MACROBLOCKD *const x = &mb->e_mbd;
|
||||
BLOCK *const mb_y2 = mb->block + 24;
|
||||
BLOCKD *const x_y2 = x->block + 24;
|
||||
short *Y2DCPtr = mb_y2->src_diff;
|
||||
BLOCK *beptr;
|
||||
int d;
|
||||
|
||||
static void rd_pick_intra4x4block(
|
||||
ENCODEMB_INVOKE(rtcd, submby)( mb->src_diff, mb->src.y_buffer,
|
||||
mb->e_mbd.predictor, mb->src.y_stride );
|
||||
|
||||
// Fdct and building the 2nd order block
|
||||
for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
|
||||
{
|
||||
mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
|
||||
*Y2DCPtr++ = beptr->coeff[0];
|
||||
*Y2DCPtr++ = beptr->coeff[16];
|
||||
}
|
||||
|
||||
// 2nd order fdct
|
||||
mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
|
||||
|
||||
// Quantization
|
||||
for (b = 0; b < 16; b++)
|
||||
{
|
||||
mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
|
||||
}
|
||||
|
||||
// DC predication and Quantization of 2nd Order block
|
||||
mb->quantize_b(mb_y2, x_y2);
|
||||
|
||||
// Distortion
|
||||
d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 1) << 2;
|
||||
d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff);
|
||||
|
||||
*Distortion = (d >> 4);
|
||||
|
||||
// rate
|
||||
*Rate = vp8_rdcost_mby(mb);
|
||||
}
|
||||
|
||||
static void save_predictor(unsigned char *predictor, unsigned char *dst)
|
||||
{
|
||||
int r, c;
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
memcpy(dst, predictor, 4);
|
||||
dst += 4;
|
||||
predictor += 16;
|
||||
}
|
||||
}
|
||||
static void restore_predictor(unsigned char *predictor, unsigned char *dst)
|
||||
{
|
||||
int r, c;
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
memcpy(predictor, dst, 4);
|
||||
dst += 4;
|
||||
predictor += 16;
|
||||
}
|
||||
}
|
||||
static int rd_pick_intra4x4block(
|
||||
VP8_COMP *cpi,
|
||||
MACROBLOCK *x,
|
||||
BLOCK *be,
|
||||
BLOCKD *b,
|
||||
B_PREDICTION_MODE *best_mode,
|
||||
B_PREDICTION_MODE above,
|
||||
B_PREDICTION_MODE left,
|
||||
unsigned int *bmode_costs,
|
||||
ENTROPY_CONTEXT *a,
|
||||
ENTROPY_CONTEXT *l,
|
||||
|
||||
@@ -554,31 +619,27 @@ static void rd_pick_intra4x4block(
|
||||
int *bestdistortion)
|
||||
{
|
||||
B_PREDICTION_MODE mode;
|
||||
int best_rd = INT_MAX; // 1<<30
|
||||
int best_rd = INT_MAX;
|
||||
int rate = 0;
|
||||
int distortion;
|
||||
unsigned int *mode_costs;
|
||||
|
||||
ENTROPY_CONTEXT ta = *a, tempa = *a;
|
||||
ENTROPY_CONTEXT tl = *l, templ = *l;
|
||||
|
||||
|
||||
if (x->e_mbd.frame_type == KEY_FRAME)
|
||||
{
|
||||
mode_costs = x->bmode_costs[above][left];
|
||||
}
|
||||
else
|
||||
{
|
||||
mode_costs = x->inter_bmode_costs;
|
||||
}
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16);
|
||||
DECLARE_ALIGNED_ARRAY(16, short, dqcoeff, 16);
|
||||
|
||||
for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
|
||||
{
|
||||
int this_rd;
|
||||
int ratey;
|
||||
|
||||
rate = mode_costs[mode];
|
||||
vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, mode);
|
||||
rate = bmode_costs[mode];
|
||||
|
||||
vp8_predict_intra4x4(b, mode, b->predictor);
|
||||
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
|
||||
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b(be, b);
|
||||
|
||||
tempa = ta;
|
||||
templ = tl;
|
||||
@@ -598,25 +659,36 @@ static void rd_pick_intra4x4block(
|
||||
*best_mode = mode;
|
||||
*a = tempa;
|
||||
*l = templ;
|
||||
save_predictor(b->predictor, predictor);
|
||||
vpx_memcpy(dqcoeff, b->dqcoeff, 32);
|
||||
}
|
||||
}
|
||||
|
||||
b->bmi.mode = (B_PREDICTION_MODE)(*best_mode);
|
||||
vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, b->bmi.mode);
|
||||
|
||||
restore_predictor(b->predictor, predictor);
|
||||
vpx_memcpy(b->dqcoeff, dqcoeff, 32);
|
||||
|
||||
IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(b->dqcoeff, b->diff, 32);
|
||||
RECON_INVOKE(IF_RTCD(&cpi->rtcd.common->recon), recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
|
||||
return best_rd;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int *rate_y, int *Distortion)
|
||||
int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
|
||||
int *rate_y, int *Distortion, int best_rd)
|
||||
{
|
||||
MACROBLOCKD *const xd = &mb->e_mbd;
|
||||
int i;
|
||||
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
|
||||
int distortion = 0;
|
||||
int tot_rate_y = 0;
|
||||
int total_rd = 0;
|
||||
ENTROPY_CONTEXT_PLANES t_above, t_left;
|
||||
ENTROPY_CONTEXT *ta;
|
||||
ENTROPY_CONTEXT *tl;
|
||||
unsigned int *bmode_costs;
|
||||
|
||||
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
|
||||
@@ -626,17 +698,25 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int
|
||||
|
||||
vp8_intra_prediction_down_copy(xd);
|
||||
|
||||
bmode_costs = mb->inter_bmode_costs;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
MODE_INFO *const mic = xd->mode_info_context;
|
||||
const int mis = xd->mode_info_stride;
|
||||
const B_PREDICTION_MODE A = vp8_above_bmi(mic, i, mis)->mode;
|
||||
const B_PREDICTION_MODE L = vp8_left_bmi(mic, i)->mode;
|
||||
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
|
||||
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
|
||||
|
||||
rd_pick_intra4x4block(
|
||||
cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L,
|
||||
if (mb->e_mbd.frame_type == KEY_FRAME)
|
||||
{
|
||||
const B_PREDICTION_MODE A = vp8_above_bmi(mic, i, mis)->mode;
|
||||
const B_PREDICTION_MODE L = vp8_left_bmi(mic, i)->mode;
|
||||
|
||||
bmode_costs = mb->bmode_costs[A][L];
|
||||
}
|
||||
|
||||
total_rd += rd_pick_intra4x4block(
|
||||
cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
|
||||
ta + vp8_block2above[i],
|
||||
tl + vp8_block2left[i], &r, &ry, &d);
|
||||
|
||||
@@ -644,42 +724,43 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int
|
||||
distortion += d;
|
||||
tot_rate_y += ry;
|
||||
mic->bmi[i].mode = xd->block[i].bmi.mode = best_mode;
|
||||
|
||||
if(total_rd >= best_rd)
|
||||
break;
|
||||
}
|
||||
|
||||
if(total_rd >= best_rd)
|
||||
return INT_MAX;
|
||||
|
||||
*Rate = cost;
|
||||
*rate_y += tot_rate_y;
|
||||
*Distortion = distortion;
|
||||
|
||||
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
|
||||
}
|
||||
|
||||
int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int *rate_y, int *Distortion)
|
||||
int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
|
||||
MACROBLOCK *x,
|
||||
int *Rate,
|
||||
int *rate_y,
|
||||
int *Distortion)
|
||||
{
|
||||
|
||||
MB_PREDICTION_MODE mode;
|
||||
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
|
||||
int rate, ratey;
|
||||
unsigned int distortion;
|
||||
int distortion;
|
||||
int best_rd = INT_MAX;
|
||||
int this_rd;
|
||||
|
||||
//Y Search for 16x16 intra prediction mode
|
||||
for (mode = DC_PRED; mode <= TM_PRED; mode++)
|
||||
{
|
||||
int this_rd;
|
||||
int dummy;
|
||||
rate = 0;
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.mode = mode;
|
||||
|
||||
rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
|
||||
vp8_encode_intra16x16mbyrd(IF_RTCD(&cpi->rtcd), x);
|
||||
|
||||
ratey = vp8_rdcost_mby(x);
|
||||
|
||||
rate += ratey;
|
||||
|
||||
VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer, x->src.y_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride, &distortion, &dummy);
|
||||
macro_block_yrd(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd.encodemb));
|
||||
rate = ratey + x->mbmode_cost[x->e_mbd.frame_type]
|
||||
[x->e_mbd.mode_info_context->mbmi.mode];
|
||||
|
||||
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
|
||||
|
||||
@@ -689,7 +770,7 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int
|
||||
best_rd = this_rd;
|
||||
*Rate = rate;
|
||||
*rate_y = ratey;
|
||||
*Distortion = (int)distortion;
|
||||
*Distortion = distortion;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -697,7 +778,6 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int
|
||||
return best_rd;
|
||||
}
|
||||
|
||||
|
||||
static int rd_cost_mbuv(MACROBLOCK *mb)
|
||||
{
|
||||
int b;
|
||||
@@ -725,15 +805,6 @@ static int rd_cost_mbuv(MACROBLOCK *mb)
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_get_mbuvrecon_error(const vp8_variance_rtcd_vtable_t *rtcd, const MACROBLOCK *x) // sum of squares
|
||||
{
|
||||
unsigned int sse0, sse1;
|
||||
int sum0, sum1;
|
||||
VARIANCE_INVOKE(rtcd, get8x8var)(x->src.u_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer, x->e_mbd.dst.uv_stride, &sse0, &sum0);
|
||||
VARIANCE_INVOKE(rtcd, get8x8var)(x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride, &sse1, &sum1);
|
||||
return (sse0 + sse1);
|
||||
}
|
||||
|
||||
static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel)
|
||||
{
|
||||
vp8_build_uvmvs(&x->e_mbd, fullpixel);
|
||||
@@ -761,12 +832,17 @@ int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *ra
|
||||
int this_rd;
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
|
||||
vp8_encode_intra16x16mbuvrd(IF_RTCD(&cpi->rtcd), x);
|
||||
vp8_build_intra_predictors_mbuv(&x->e_mbd);
|
||||
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
|
||||
x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor,
|
||||
x->src.uv_stride);
|
||||
vp8_transform_mbuv(x);
|
||||
vp8_quantize_mbuv(x);
|
||||
|
||||
rate_to = rd_cost_mbuv(x);
|
||||
rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode];
|
||||
|
||||
distortion = vp8_get_mbuvrecon_error(IF_RTCD(&cpi->rtcd.variance), x);
|
||||
distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
|
||||
|
||||
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
|
||||
|
||||
@@ -938,48 +1014,6 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels
|
||||
return distortion;
|
||||
}
|
||||
|
||||
static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp8_encodemb_rtcd_vtable_t *rtcd)
|
||||
{
|
||||
int b;
|
||||
MACROBLOCKD *const x = &mb->e_mbd;
|
||||
BLOCK *const mb_y2 = mb->block + 24;
|
||||
BLOCKD *const x_y2 = x->block + 24;
|
||||
short *Y2DCPtr = mb_y2->src_diff;
|
||||
BLOCK *beptr;
|
||||
int d;
|
||||
|
||||
ENCODEMB_INVOKE(rtcd, submby)(mb->src_diff, mb->src.y_buffer, mb->e_mbd.predictor, mb->src.y_stride);
|
||||
|
||||
// Fdct and building the 2nd order block
|
||||
for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
|
||||
{
|
||||
mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
|
||||
*Y2DCPtr++ = beptr->coeff[0];
|
||||
*Y2DCPtr++ = beptr->coeff[16];
|
||||
}
|
||||
|
||||
// 2nd order fdct
|
||||
mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
|
||||
|
||||
// Quantization
|
||||
for (b = 0; b < 16; b++)
|
||||
{
|
||||
mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
|
||||
}
|
||||
|
||||
// DC predication and Quantization of 2nd Order block
|
||||
mb->quantize_b(mb_y2, x_y2);
|
||||
|
||||
// Distortion
|
||||
d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 1) << 2;
|
||||
d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff);
|
||||
|
||||
*Distortion = (d >> 4);
|
||||
|
||||
// rate
|
||||
*Rate = vp8_rdcost_mby(mb);
|
||||
}
|
||||
|
||||
unsigned char vp8_mbsplit_offset2[4][16] = {
|
||||
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
@@ -1140,7 +1174,7 @@ void vp8_rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
|
||||
|
||||
if (cpi->sf.search_method == HEX)
|
||||
bestsme = vp8_hex_search(x, c, e, bsi->ref_mv,
|
||||
&mode_mv[NEW4X4], step_param, sadpb, &num00, v_fn_ptr, x->mvsadcost, x->mvcost);
|
||||
&mode_mv[NEW4X4], step_param, sadpb, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv);
|
||||
|
||||
else
|
||||
{
|
||||
@@ -1420,48 +1454,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
|
||||
return bsi.segment_rd;
|
||||
}
|
||||
|
||||
|
||||
static void mv_bias(const MODE_INFO *x, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
MV xmv;
|
||||
xmv = x->mbmi.mv.as_mv;
|
||||
|
||||
if (ref_frame_sign_bias[x->mbmi.ref_frame] != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
xmv.row *= -1;
|
||||
xmv.col *= -1;
|
||||
}
|
||||
|
||||
mvp->as_mv = xmv;
|
||||
}
|
||||
|
||||
static void lf_mv_bias(const int lf_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
MV xmv;
|
||||
xmv = mvp->as_mv;
|
||||
|
||||
if (lf_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
xmv.row *= -1;
|
||||
xmv.col *= -1;
|
||||
}
|
||||
|
||||
mvp->as_mv = xmv;
|
||||
}
|
||||
|
||||
static void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void swap(int *x,int *y)
|
||||
{
|
||||
@@ -1546,7 +1539,7 @@ static void quicksortsad(int arr[],int idx[], int left, int right)
|
||||
}
|
||||
|
||||
//The improved MV prediction
|
||||
static void vp8_mv_pred
|
||||
void vp8_mv_pred
|
||||
(
|
||||
VP8_COMP *cpi,
|
||||
MACROBLOCKD *xd,
|
||||
@@ -1561,67 +1554,67 @@ static void vp8_mv_pred
|
||||
const MODE_INFO *above = here - xd->mode_info_stride;
|
||||
const MODE_INFO *left = here - 1;
|
||||
const MODE_INFO *aboveleft = above - 1;
|
||||
int_mv near_mvs[7];
|
||||
int near_ref[7];
|
||||
int_mv near_mvs[8];
|
||||
int near_ref[8];
|
||||
int_mv mv;
|
||||
int vcnt=0;
|
||||
int find=0;
|
||||
int mb_offset;
|
||||
|
||||
int mvx[7];
|
||||
int mvy[7];
|
||||
int mvx[8];
|
||||
int mvy[8];
|
||||
int i;
|
||||
|
||||
mv.as_int = 0;
|
||||
|
||||
if(here->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = 0;
|
||||
near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = 0;
|
||||
near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
|
||||
near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
|
||||
|
||||
// read in 3 nearby block's MVs from current frame as prediction candidates.
|
||||
if (above->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
|
||||
mv_bias(above, refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
near_ref[vcnt] = above->mbmi.ref_frame;
|
||||
}
|
||||
vcnt++;
|
||||
if (left->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
|
||||
mv_bias(left, refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
near_ref[vcnt] = left->mbmi.ref_frame;
|
||||
}
|
||||
vcnt++;
|
||||
if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
|
||||
mv_bias(aboveleft, refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
near_ref[vcnt] = aboveleft->mbmi.ref_frame;
|
||||
}
|
||||
vcnt++;
|
||||
|
||||
// read in 4 nearby block's MVs from last frame.
|
||||
// read in 5 nearby block's MVs from last frame.
|
||||
if(cpi->common.last_frame_type != KEY_FRAME)
|
||||
{
|
||||
mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride) + (-xd->mb_to_left_edge/128 +1) ;
|
||||
mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
|
||||
|
||||
// current in last frame
|
||||
if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
|
||||
{
|
||||
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
|
||||
lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
near_ref[vcnt] = cpi->lf_ref_frame[mb_offset];
|
||||
}
|
||||
vcnt++;
|
||||
|
||||
// above in last frame
|
||||
if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride] != INTRA_FRAME)
|
||||
if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
|
||||
{
|
||||
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride].as_int;
|
||||
lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - xd->mode_info_stride];
|
||||
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
|
||||
mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride-1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1];
|
||||
}
|
||||
vcnt++;
|
||||
|
||||
@@ -1629,17 +1622,26 @@ static void vp8_mv_pred
|
||||
if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
|
||||
{
|
||||
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
|
||||
lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1];
|
||||
}
|
||||
vcnt++;
|
||||
|
||||
// aboveleft in last frame
|
||||
if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride -1] != INTRA_FRAME)
|
||||
// right in last frame
|
||||
if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
|
||||
{
|
||||
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride -1].as_int;
|
||||
lf_mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - xd->mode_info_stride -1];
|
||||
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
|
||||
mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
near_ref[vcnt] = cpi->lf_ref_frame[mb_offset +1];
|
||||
}
|
||||
vcnt++;
|
||||
|
||||
// below in last frame
|
||||
if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
|
||||
{
|
||||
near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
|
||||
mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
|
||||
near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1];
|
||||
}
|
||||
vcnt++;
|
||||
}
|
||||
@@ -1652,9 +1654,7 @@ static void vp8_mv_pred
|
||||
{
|
||||
mv.as_int = near_mvs[near_sadidx[i]].as_int;
|
||||
find = 1;
|
||||
if(vcnt<2)
|
||||
*sr = 4;
|
||||
else if (vcnt<4)
|
||||
if (i < 3)
|
||||
*sr = 3;
|
||||
else
|
||||
*sr = 2;
|
||||
@@ -1687,6 +1687,62 @@ static void vp8_mv_pred
|
||||
vp8_clamp_mv(mvp, xd);
|
||||
}
|
||||
|
||||
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[])
|
||||
{
|
||||
|
||||
int near_sad[8] = {0}; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
|
||||
|
||||
//calculate sad for current frame 3 nearby MBs.
|
||||
if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
|
||||
{
|
||||
near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
|
||||
}else if(xd->mb_to_top_edge==0)
|
||||
{ //only has left MB for sad calculation.
|
||||
near_sad[0] = near_sad[2] = INT_MAX;
|
||||
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
|
||||
}else if(xd->mb_to_left_edge ==0)
|
||||
{ //only has left MB for sad calculation.
|
||||
near_sad[1] = near_sad[2] = INT_MAX;
|
||||
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
|
||||
}else
|
||||
{
|
||||
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
|
||||
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
|
||||
near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
if(cpi->common.last_frame_type != KEY_FRAME)
|
||||
{
|
||||
//calculate sad for last frame 5 nearby MBs.
|
||||
unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
|
||||
int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
|
||||
|
||||
if(xd->mb_to_top_edge==0) near_sad[4] = INT_MAX;
|
||||
if(xd->mb_to_left_edge ==0) near_sad[5] = INT_MAX;
|
||||
if(xd->mb_to_right_edge ==0) near_sad[6] = INT_MAX;
|
||||
if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
|
||||
|
||||
if(near_sad[4] != INT_MAX)
|
||||
near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
|
||||
if(near_sad[5] != INT_MAX)
|
||||
near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
|
||||
near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
|
||||
if(near_sad[6] != INT_MAX)
|
||||
near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff);
|
||||
if(near_sad[7] != INT_MAX)
|
||||
near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
if(cpi->common.last_frame_type != KEY_FRAME)
|
||||
{
|
||||
quicksortsad(near_sad, near_sadidx, 0, 7);
|
||||
}else
|
||||
{
|
||||
quicksortsad(near_sad, near_sadidx, 0, 2);
|
||||
}
|
||||
}
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra)
|
||||
{
|
||||
BLOCK *b = &x->block[0];
|
||||
@@ -1724,8 +1780,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
int force_no_skip = 0;
|
||||
|
||||
MV mvp;
|
||||
int near_sad[7]; // 0-cf above, 1-cf left, 2-cf aboveleft, 3-lf current, 4-lf above, 5-lf left, 6-lf aboveleft
|
||||
int near_sadidx[7] = {0, 1, 2, 3, 4, 5, 6};
|
||||
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
int saddone=0;
|
||||
int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7)
|
||||
|
||||
@@ -1871,67 +1926,11 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
lf_or_gf = frame_lf_or_gf[x->e_mbd.mode_info_context->mbmi.ref_frame];
|
||||
}
|
||||
|
||||
|
||||
if(x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
|
||||
{
|
||||
if(!saddone)
|
||||
{
|
||||
//calculate sad for current frame 3 nearby MBs.
|
||||
if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
|
||||
{
|
||||
near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
|
||||
}else if(xd->mb_to_top_edge==0)
|
||||
{ //only has left MB for sad calculation.
|
||||
near_sad[0] = near_sad[2] = INT_MAX;
|
||||
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
|
||||
}else if(xd->mb_to_left_edge ==0)
|
||||
{ //only has left MB for sad calculation.
|
||||
near_sad[1] = near_sad[2] = INT_MAX;
|
||||
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
|
||||
}else
|
||||
{
|
||||
near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff);
|
||||
near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff);
|
||||
near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
if(cpi->common.last_frame_type != KEY_FRAME)
|
||||
{
|
||||
//calculate sad for last frame 4 nearby MBs.
|
||||
unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
|
||||
int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
|
||||
|
||||
if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
|
||||
{
|
||||
near_sad[4] = near_sad[5] = near_sad[6] = INT_MAX;
|
||||
near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
|
||||
}else if(xd->mb_to_top_edge==0)
|
||||
{ //only has left MB for sad calculation.
|
||||
near_sad[4] = near_sad[6] = INT_MAX;
|
||||
near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
|
||||
near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
|
||||
}else if(xd->mb_to_left_edge ==0)
|
||||
{ //only has left MB for sad calculation.
|
||||
near_sad[5] = near_sad[6] = INT_MAX;
|
||||
near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
|
||||
near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
|
||||
}else
|
||||
{
|
||||
near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff);
|
||||
near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff);
|
||||
near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff);
|
||||
near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16 -16, pre_y_stride, 0x7fffffff);
|
||||
}
|
||||
}
|
||||
|
||||
if(cpi->common.last_frame_type != KEY_FRAME)
|
||||
{
|
||||
quicksortsad(near_sad, near_sadidx, 0, 6);
|
||||
}else
|
||||
{
|
||||
quicksortsad(near_sad, near_sadidx, 0, 2);
|
||||
}
|
||||
|
||||
vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
|
||||
saddone = 1;
|
||||
}
|
||||
|
||||
@@ -1990,27 +1989,34 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
cpi->zbin_mode_boost = MV_ZBIN_BOOST;
|
||||
}
|
||||
|
||||
vp8cx_mb_init_quantizer(cpi, x);
|
||||
vp8_update_zbin_extra(cpi, x);
|
||||
}
|
||||
|
||||
switch (this_mode)
|
||||
{
|
||||
case B_PRED:
|
||||
{
|
||||
int tmp_rd;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
|
||||
}
|
||||
// Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
|
||||
vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion);
|
||||
tmp_rd = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd);
|
||||
rate2 += rate;
|
||||
|
||||
distortion2 += distortion;
|
||||
rate2 += uv_intra_rate;
|
||||
rate_uv = uv_intra_rate_tokenonly;
|
||||
distortion2 += uv_intra_distortion;
|
||||
distortion_uv = uv_intra_distortion;
|
||||
break;
|
||||
|
||||
if(tmp_rd < best_yrd)
|
||||
{
|
||||
rate2 += uv_intra_rate;
|
||||
rate_uv = uv_intra_rate_tokenonly;
|
||||
distortion2 += uv_intra_distortion;
|
||||
distortion_uv = uv_intra_distortion;
|
||||
}
|
||||
else
|
||||
{
|
||||
this_rd = INT_MAX;
|
||||
disable_skip = 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case SPLITMV:
|
||||
{
|
||||
@@ -2046,22 +2052,16 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
case V_PRED:
|
||||
case H_PRED:
|
||||
case TM_PRED:
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
|
||||
}
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
{
|
||||
macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ;
|
||||
rate2 += rate_y;
|
||||
distortion2 += distortion;
|
||||
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
|
||||
rate2 += uv_intra_rate;
|
||||
rate_uv = uv_intra_rate_tokenonly;
|
||||
distortion2 += uv_intra_distortion;
|
||||
distortion_uv = uv_intra_distortion;
|
||||
}
|
||||
macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ;
|
||||
rate2 += rate_y;
|
||||
distortion2 += distortion;
|
||||
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
|
||||
rate2 += uv_intra_rate;
|
||||
rate_uv = uv_intra_rate_tokenonly;
|
||||
distortion2 += uv_intra_distortion;
|
||||
distortion_uv = uv_intra_distortion;
|
||||
break;
|
||||
|
||||
case NEWMV:
|
||||
@@ -2116,7 +2116,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
|
||||
if (cpi->sf.search_method == HEX)
|
||||
{
|
||||
bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);
|
||||
bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv);
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
}
|
||||
@@ -2266,22 +2266,28 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
else if (x->encode_breakout)
|
||||
{
|
||||
int sum, sse;
|
||||
int threshold = (xd->block[0].dequant[1]
|
||||
* xd->block[0].dequant[1] >>4);
|
||||
|
||||
if(threshold < x->encode_breakout)
|
||||
threshold = x->encode_breakout;
|
||||
|
||||
VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)
|
||||
(x->src.y_buffer, x->src.y_stride,
|
||||
x->e_mbd.predictor, 16, (unsigned int *)(&sse), &sum);
|
||||
|
||||
if (sse < x->encode_breakout)
|
||||
if (sse < threshold)
|
||||
{
|
||||
// Check u and v to make sure skip is ok
|
||||
int sse2 = 0;
|
||||
|
||||
// add dc check
|
||||
if (abs(sum) < (cpi->common.Y2dequant[0][0] << 2))
|
||||
/* If theres is no codeable 2nd order dc
|
||||
or a very small uniform pixel change change */
|
||||
if (abs(sum) < (xd->block[24].dequant[0]<<2)||
|
||||
((sum * sum>>8) > sse && abs(sum) <128))
|
||||
{
|
||||
sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
if (sse2 * 2 < x->encode_breakout)
|
||||
if (sse2 * 2 < threshold)
|
||||
{
|
||||
x->skip = 1;
|
||||
distortion2 = sse + sse2;
|
||||
@@ -2427,6 +2433,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
|
||||
if (x->skip)
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
// Reduce the activation RD thresholds for the best choice mode
|
||||
@@ -2497,6 +2504,15 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
}
|
||||
|
||||
|
||||
if(best_mbmode.mode <= B_PRED)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
best_bmodes[i].mv.as_int = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// macroblock modes
|
||||
vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
|
||||
vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
|
||||
@@ -2511,4 +2527,3 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
return best_rd;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -12,10 +12,22 @@
|
||||
#ifndef __INC_RDOPT_H
|
||||
#define __INC_RDOPT_H
|
||||
void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue);
|
||||
int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion);
|
||||
int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion, int best_rd);
|
||||
int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *returnrate, int *rate_to, int *returndistortion);
|
||||
int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_to, int *distortion);
|
||||
extern int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
|
||||
|
||||
extern void vp8_mv_pred
|
||||
(
|
||||
VP8_COMP *cpi,
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
MV *mvp,
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias,
|
||||
int *sr,
|
||||
int near_sadidx[]
|
||||
);
|
||||
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]);
|
||||
|
||||
#endif
|
||||
|
@@ -70,7 +70,7 @@ static void vp8_temporal_filter_predictors_mb_c
|
||||
// U & V
|
||||
mv_row >>= 1;
|
||||
mv_col >>= 1;
|
||||
stride >>= 1;
|
||||
stride = (stride + 1) >> 1;
|
||||
offset = (mv_row >> 3) * stride + (mv_col >> 3);
|
||||
uptr = u_mb_ptr + offset;
|
||||
vptr = v_mb_ptr + offset;
|
||||
@@ -204,7 +204,7 @@ static int vp8_temporal_filter_find_matching_mb_c
|
||||
step_param,
|
||||
sadpb/*x->errorperbit*/,
|
||||
&num00, &cpi->fn_ptr[BLOCK_16X16],
|
||||
mvsadcost, mvcost);
|
||||
mvsadcost, mvcost, &best_ref_mv1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@@ -11,220 +11,169 @@
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
|
||||
;int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
|
||||
; short *qcoeff_ptr,short *dequant_ptr,
|
||||
; const int *default_zig_zag, short *round_ptr,
|
||||
; short *quant_ptr, short *dqcoeff_ptr,
|
||||
;int vp8_regular_quantize_b_impl_sse2(
|
||||
; short *coeff_ptr,
|
||||
; short *zbin_ptr,
|
||||
; short *qcoeff_ptr,
|
||||
; short *dequant_ptr,
|
||||
; const int *default_zig_zag,
|
||||
; short *round_ptr,
|
||||
; short *quant_ptr,
|
||||
; short *dqcoeff_ptr,
|
||||
; unsigned short zbin_oq_value,
|
||||
; short *zbin_boost_ptr);
|
||||
; short *zbin_boost_ptr,
|
||||
; short *quant_shift);
|
||||
;
|
||||
global sym(vp8_regular_quantize_b_impl_sse2)
|
||||
sym(vp8_regular_quantize_b_impl_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 10
|
||||
SHADOW_ARGS_TO_STACK 11
|
||||
SAVE_XMM
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
ALIGN_STACK 16, rax
|
||||
%define abs_minus_zbin 0
|
||||
%define temp_qcoeff 32
|
||||
%define qcoeff 64
|
||||
%define eob_tmp 96
|
||||
%define stack_size 112
|
||||
sub rsp, stack_size
|
||||
; end prolog
|
||||
|
||||
ALIGN_STACK 16, rax
|
||||
|
||||
%define abs_minus_zbin_lo 0
|
||||
%define abs_minus_zbin_hi 16
|
||||
%define temp_qcoeff_lo 32
|
||||
%define temp_qcoeff_hi 48
|
||||
%define save_xmm6 64
|
||||
%define save_xmm7 80
|
||||
%define eob 96
|
||||
|
||||
%define vp8_regularquantizeb_stack_size eob + 16
|
||||
|
||||
sub rsp, vp8_regularquantizeb_stack_size
|
||||
|
||||
movdqa OWORD PTR[rsp + save_xmm6], xmm6
|
||||
movdqa OWORD PTR[rsp + save_xmm7], xmm7
|
||||
|
||||
mov rdx, arg(0) ;coeff_ptr
|
||||
mov eax, arg(8) ;zbin_oq_value
|
||||
|
||||
mov rcx, arg(1) ;zbin_ptr
|
||||
movd xmm7, eax
|
||||
mov rdx, arg(0) ; coeff_ptr
|
||||
mov rcx, arg(1) ; zbin_ptr
|
||||
movd xmm7, arg(8) ; zbin_oq_value
|
||||
mov rdi, arg(5) ; round_ptr
|
||||
mov rsi, arg(6) ; quant_ptr
|
||||
|
||||
; z
|
||||
movdqa xmm0, OWORD PTR[rdx]
|
||||
movdqa xmm4, OWORD PTR[rdx + 16]
|
||||
|
||||
pshuflw xmm7, xmm7, 0
|
||||
punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
movdqa xmm5, xmm4
|
||||
|
||||
psraw xmm0, 15 ;sign of z (aka sz)
|
||||
psraw xmm4, 15 ;sign of z (aka sz)
|
||||
|
||||
pxor xmm1, xmm0
|
||||
pxor xmm5, xmm4
|
||||
|
||||
movdqa xmm2, OWORD PTR[rcx] ;load zbin_ptr
|
||||
movdqa xmm3, OWORD PTR[rcx + 16] ;load zbin_ptr
|
||||
|
||||
pshuflw xmm7, xmm7, 0
|
||||
psubw xmm1, xmm0 ;x = abs(z)
|
||||
|
||||
punpcklwd xmm7, xmm7 ;duplicated zbin_oq_value
|
||||
psubw xmm5, xmm4 ;x = abs(z)
|
||||
|
||||
paddw xmm2, xmm7
|
||||
paddw xmm3, xmm7
|
||||
|
||||
psubw xmm1, xmm2 ;sub (zbin_ptr + zbin_oq_value)
|
||||
psubw xmm5, xmm3 ;sub (zbin_ptr + zbin_oq_value)
|
||||
|
||||
mov rdi, arg(5) ;round_ptr
|
||||
mov rsi, arg(6) ;quant_ptr
|
||||
|
||||
movdqa OWORD PTR[rsp + abs_minus_zbin_lo], xmm1
|
||||
movdqa OWORD PTR[rsp + abs_minus_zbin_hi], xmm5
|
||||
|
||||
paddw xmm1, xmm2 ;add (zbin_ptr + zbin_oq_value) back
|
||||
paddw xmm5, xmm3 ;add (zbin_ptr + zbin_oq_value) back
|
||||
|
||||
movdqa xmm2, OWORD PTR[rdi]
|
||||
movdqa xmm3, OWORD PTR[rsi]
|
||||
|
||||
movdqa xmm6, OWORD PTR[rdi + 16]
|
||||
movdqa xmm7, OWORD PTR[rsi + 16]
|
||||
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm5, xmm6
|
||||
|
||||
pmulhw xmm1, xmm3
|
||||
pmulhw xmm5, xmm7
|
||||
|
||||
mov rsi, arg(2) ;qcoeff_ptr
|
||||
pxor xmm6, xmm6
|
||||
; sz
|
||||
psraw xmm0, 15
|
||||
psraw xmm4, 15
|
||||
|
||||
; (z ^ sz)
|
||||
pxor xmm1, xmm0
|
||||
pxor xmm5, xmm4
|
||||
|
||||
; x = abs(z)
|
||||
psubw xmm1, xmm0
|
||||
psubw xmm5, xmm4
|
||||
|
||||
movdqa OWORD PTR[rsp + temp_qcoeff_lo], xmm1
|
||||
movdqa OWORD PTR[rsp + temp_qcoeff_hi], xmm5
|
||||
movdqa xmm2, OWORD PTR[rcx]
|
||||
movdqa xmm3, OWORD PTR[rcx + 16]
|
||||
|
||||
movdqa OWORD PTR[rsi], xmm6 ;zero qcoeff
|
||||
movdqa OWORD PTR[rsi + 16], xmm6 ;zero qcoeff
|
||||
; *zbin_ptr + zbin_oq_value
|
||||
paddw xmm2, xmm7
|
||||
paddw xmm3, xmm7
|
||||
|
||||
xor rax, rax
|
||||
mov rcx, -1
|
||||
; x - (*zbin_ptr + zbin_oq_value)
|
||||
psubw xmm1, xmm2
|
||||
psubw xmm5, xmm3
|
||||
movdqa OWORD PTR[rsp + abs_minus_zbin], xmm1
|
||||
movdqa OWORD PTR[rsp + abs_minus_zbin + 16], xmm5
|
||||
|
||||
mov [rsp + eob], rcx
|
||||
mov rsi, arg(9) ;zbin_boost_ptr
|
||||
|
||||
mov rbx, arg(4) ;default_zig_zag
|
||||
|
||||
rq_zigzag_loop:
|
||||
movsxd rcx, DWORD PTR[rbx + rax*4] ;now we have rc
|
||||
movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin
|
||||
lea rsi, [rsi + 2] ;zbin_boost_ptr++
|
||||
|
||||
movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
|
||||
|
||||
sub edx, edi ;x - zbin
|
||||
jl rq_zigzag_1
|
||||
|
||||
mov rdi, arg(2) ;qcoeff_ptr
|
||||
|
||||
movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
|
||||
|
||||
cmp edx, 0
|
||||
je rq_zigzag_1
|
||||
|
||||
mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
|
||||
|
||||
mov rsi, arg(9) ;zbin_boost_ptr
|
||||
mov [rsp + eob], rax ;eob = i
|
||||
|
||||
rq_zigzag_1:
|
||||
movsxd rcx, DWORD PTR[rbx + rax*4 + 4]
|
||||
movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin
|
||||
lea rsi, [rsi + 2] ;zbin_boost_ptr++
|
||||
|
||||
movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
|
||||
lea rax, [rax + 1]
|
||||
|
||||
sub edx, edi ;x - zbin
|
||||
jl rq_zigzag_1a
|
||||
|
||||
mov rdi, arg(2) ;qcoeff_ptr
|
||||
|
||||
movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
|
||||
|
||||
cmp edx, 0
|
||||
je rq_zigzag_1a
|
||||
|
||||
mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
|
||||
|
||||
mov rsi, arg(9) ;zbin_boost_ptr
|
||||
mov [rsp + eob], rax ;eob = i
|
||||
|
||||
rq_zigzag_1a:
|
||||
movsxd rcx, DWORD PTR[rbx + rax*4 + 4]
|
||||
movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin
|
||||
lea rsi, [rsi + 2] ;zbin_boost_ptr++
|
||||
|
||||
movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
|
||||
lea rax, [rax + 1]
|
||||
|
||||
sub edx, edi ;x - zbin
|
||||
jl rq_zigzag_1b
|
||||
|
||||
mov rdi, arg(2) ;qcoeff_ptr
|
||||
|
||||
movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
|
||||
|
||||
cmp edx, 0
|
||||
je rq_zigzag_1b
|
||||
|
||||
mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
|
||||
|
||||
mov rsi, arg(9) ;zbin_boost_ptr
|
||||
mov [rsp + eob], rax ;eob = i
|
||||
|
||||
rq_zigzag_1b:
|
||||
movsxd rcx, DWORD PTR[rbx + rax*4 + 4]
|
||||
movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin
|
||||
lea rsi, [rsi + 2] ;zbin_boost_ptr++
|
||||
|
||||
movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
|
||||
lea rax, [rax + 1]
|
||||
|
||||
sub edx, edi ;x - zbin
|
||||
jl rq_zigzag_1c
|
||||
|
||||
mov rdi, arg(2) ;qcoeff_ptr
|
||||
|
||||
movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
|
||||
|
||||
cmp edx, 0
|
||||
je rq_zigzag_1c
|
||||
|
||||
mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
|
||||
|
||||
mov rsi, arg(9) ;zbin_boost_ptr
|
||||
mov [rsp + eob], rax ;eob = i
|
||||
|
||||
rq_zigzag_1c:
|
||||
lea rax, [rax + 1]
|
||||
|
||||
cmp rax, 16
|
||||
jl rq_zigzag_loop
|
||||
|
||||
mov rdi, arg(2) ;qcoeff_ptr
|
||||
mov rcx, arg(3) ;dequant_ptr
|
||||
mov rsi, arg(7) ;dqcoeff_ptr
|
||||
; add (zbin_ptr + zbin_oq_value) back
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm5, xmm3
|
||||
|
||||
movdqa xmm2, OWORD PTR[rdi]
|
||||
movdqa xmm3, OWORD PTR[rdi + 16]
|
||||
movdqa xmm6, OWORD PTR[rdi + 16]
|
||||
|
||||
movdqa xmm3, OWORD PTR[rsi]
|
||||
movdqa xmm7, OWORD PTR[rsi + 16]
|
||||
|
||||
; x + round
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm5, xmm6
|
||||
|
||||
; y = x * quant_ptr >> 16
|
||||
pmulhw xmm3, xmm1
|
||||
pmulhw xmm7, xmm5
|
||||
|
||||
; y += x
|
||||
paddw xmm1, xmm3
|
||||
paddw xmm5, xmm7
|
||||
|
||||
movdqa OWORD PTR[rsp + temp_qcoeff], xmm1
|
||||
movdqa OWORD PTR[rsp + temp_qcoeff + 16], xmm5
|
||||
|
||||
pxor xmm6, xmm6
|
||||
; zero qcoeff
|
||||
movdqa OWORD PTR[rsp + qcoeff], xmm6
|
||||
movdqa OWORD PTR[rsp + qcoeff + 16], xmm6
|
||||
|
||||
mov [rsp + eob_tmp], DWORD -1 ; eob
|
||||
mov rsi, arg(9) ; zbin_boost_ptr
|
||||
mov rdi, arg(4) ; default_zig_zag
|
||||
mov rax, arg(10) ; quant_shift_ptr
|
||||
|
||||
%macro ZIGZAG_LOOP 2
|
||||
rq_zigzag_loop_%1:
|
||||
movsxd rdx, DWORD PTR[rdi + (%1 * 4)] ; rc
|
||||
movsx ebx, WORD PTR [rsi] ; *zbin_boost_ptr
|
||||
lea rsi, [rsi + 2] ; zbin_boost_ptr++
|
||||
|
||||
; x
|
||||
movsx ecx, WORD PTR[rsp + abs_minus_zbin + rdx *2]
|
||||
|
||||
; if (x >= zbin)
|
||||
sub ecx, ebx ; x - zbin
|
||||
jl rq_zigzag_loop_%2 ; x < zbin
|
||||
|
||||
movsx ebx, WORD PTR[rsp + temp_qcoeff + rdx *2]
|
||||
|
||||
; downshift by quant_shift[rdx]
|
||||
movsx ecx, WORD PTR[rax + rdx*2] ; quant_shift_ptr[rc]
|
||||
sar ebx, cl ; also sets Z bit
|
||||
je rq_zigzag_loop_%2 ; !y
|
||||
mov WORD PTR[rsp + qcoeff + rdx * 2], bx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
|
||||
|
||||
mov rsi, arg(9) ; reset to b->zrun_zbin_boost
|
||||
mov [rsp + eob_tmp], DWORD %1 ; eob = i
|
||||
%endmacro
|
||||
ZIGZAG_LOOP 0, 1
|
||||
ZIGZAG_LOOP 1, 2
|
||||
ZIGZAG_LOOP 2, 3
|
||||
ZIGZAG_LOOP 3, 4
|
||||
ZIGZAG_LOOP 4, 5
|
||||
ZIGZAG_LOOP 5, 6
|
||||
ZIGZAG_LOOP 6, 7
|
||||
ZIGZAG_LOOP 7, 8
|
||||
ZIGZAG_LOOP 8, 9
|
||||
ZIGZAG_LOOP 9, 10
|
||||
ZIGZAG_LOOP 10, 11
|
||||
ZIGZAG_LOOP 11, 12
|
||||
ZIGZAG_LOOP 12, 13
|
||||
ZIGZAG_LOOP 13, 14
|
||||
ZIGZAG_LOOP 14, 15
|
||||
ZIGZAG_LOOP 15, end
|
||||
rq_zigzag_loop_end:
|
||||
|
||||
mov rbx, arg(2) ; qcoeff_ptr
|
||||
mov rcx, arg(3) ; dequant_ptr
|
||||
mov rsi, arg(7) ; dqcoeff_ptr
|
||||
mov rax, [rsp + eob_tmp] ; eob
|
||||
|
||||
movdqa xmm2, OWORD PTR[rsp + qcoeff]
|
||||
movdqa xmm3, OWORD PTR[rsp + qcoeff + 16]
|
||||
|
||||
; y ^ sz
|
||||
pxor xmm2, xmm0
|
||||
pxor xmm3, xmm4
|
||||
; x = (y ^ sz) - sz
|
||||
psubw xmm2, xmm0
|
||||
psubw xmm3, xmm4
|
||||
|
||||
movdqa xmm0, OWORD PTR[rcx]
|
||||
movdqa xmm1, OWORD PTR[rcx + 16]
|
||||
@@ -232,23 +181,20 @@ rq_zigzag_1c:
|
||||
pmullw xmm0, xmm2
|
||||
pmullw xmm1, xmm3
|
||||
|
||||
movdqa OWORD PTR[rsi], xmm0 ;store dqcoeff
|
||||
movdqa OWORD PTR[rsi + 16], xmm1 ;store dqcoeff
|
||||
|
||||
mov rax, [rsp + eob]
|
||||
|
||||
movdqa xmm6, OWORD PTR[rsp + save_xmm6]
|
||||
movdqa xmm7, OWORD PTR[rsp + save_xmm7]
|
||||
movdqa OWORD PTR[rbx], xmm2
|
||||
movdqa OWORD PTR[rbx + 16], xmm3
|
||||
movdqa OWORD PTR[rsi], xmm0 ; store dqcoeff
|
||||
movdqa OWORD PTR[rsi + 16], xmm1 ; store dqcoeff
|
||||
|
||||
add rax, 1
|
||||
|
||||
add rsp, vp8_regularquantizeb_stack_size
|
||||
pop rsp
|
||||
|
||||
; begin epilog
|
||||
add rsp, stack_size
|
||||
pop rsp
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
0
vp8/encoder/x86/quantize_ssse3.asm
Executable file → Normal file
0
vp8/encoder/x86/quantize_ssse3.asm
Executable file → Normal file
@@ -27,11 +27,11 @@ extern prototype_quantize_block(vp8_regular_quantize_b_sse2);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
/* The sse2 quantizer has not been updated to match the new exact
|
||||
* quantizer introduced in commit e04e2935
|
||||
*#undef vp8_quantize_quantb
|
||||
*#define vp8_quantize_quantb vp8_regular_quantize_b_sse2
|
||||
*/
|
||||
// Currently, this function realizes a gain on x86 and a loss on x86_64
|
||||
#if ARCH_X86
|
||||
#undef vp8_quantize_quantb
|
||||
#define vp8_quantize_quantb vp8_regular_quantize_b_sse2
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -586,52 +586,45 @@ sym(vp8_sad16x16_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
lea end_ptr, [src_ptr+src_stride*8]
|
||||
|
||||
lea end_ptr, [end_ptr+src_stride*8]
|
||||
pxor mm7, mm7
|
||||
mov end_ptr, 4
|
||||
pxor xmm7, xmm7
|
||||
|
||||
.vp8_sad16x16_sse3_loop:
|
||||
|
||||
movq ret_var, mm7
|
||||
cmp ret_var, max_err
|
||||
jg .vp8_sad16x16_early_exit
|
||||
|
||||
movq mm0, QWORD PTR [src_ptr]
|
||||
movq mm2, QWORD PTR [src_ptr+8]
|
||||
|
||||
movq mm1, QWORD PTR [ref_ptr]
|
||||
movq mm3, QWORD PTR [ref_ptr+8]
|
||||
|
||||
movq mm4, QWORD PTR [src_ptr+src_stride]
|
||||
movq mm5, QWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
psadbw mm0, mm1
|
||||
psadbw mm2, mm3
|
||||
|
||||
movq mm1, QWORD PTR [src_ptr+src_stride+8]
|
||||
movq mm3, QWORD PTR [ref_ptr+ref_stride+8]
|
||||
|
||||
psadbw mm4, mm5
|
||||
psadbw mm1, mm3
|
||||
movdqa xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [ref_ptr]
|
||||
movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
|
||||
movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea ref_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
paddw mm0, mm2
|
||||
paddw mm4, mm1
|
||||
movdqa xmm4, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm5, XMMWORD PTR [ref_ptr]
|
||||
movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
|
||||
|
||||
paddw mm7, mm0
|
||||
paddw mm7, mm4
|
||||
psadbw xmm0, xmm1
|
||||
|
||||
cmp src_ptr, end_ptr
|
||||
movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
|
||||
|
||||
psadbw xmm2, xmm3
|
||||
psadbw xmm4, xmm5
|
||||
psadbw xmm6, xmm1
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*2]
|
||||
lea ref_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
paddw xmm7, xmm0
|
||||
paddw xmm7, xmm2
|
||||
paddw xmm7, xmm4
|
||||
paddw xmm7, xmm6
|
||||
|
||||
sub end_ptr, 1
|
||||
jne .vp8_sad16x16_sse3_loop
|
||||
|
||||
movq ret_var, mm7
|
||||
|
||||
.vp8_sad16x16_early_exit:
|
||||
|
||||
mov rax, ret_var
|
||||
movq xmm0, xmm7
|
||||
psrldq xmm7, 8
|
||||
paddw xmm0, xmm7
|
||||
movq rax, xmm0
|
||||
|
||||
STACK_FRAME_DESTROY_X3
|
||||
|
||||
|
@@ -493,8 +493,8 @@ sym(vp8_get8x8var_sse2):
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned int Height,
|
||||
; unsigned short *HFilter,
|
||||
; unsigned short *VFilter,
|
||||
; int xoffset,
|
||||
; int yoffset,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared;;
|
||||
;
|
||||
@@ -504,68 +504,80 @@ sym(vp8_filter_block2d_bil_var_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 9
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
sub rsp, 16
|
||||
push rbx
|
||||
; end prolog
|
||||
|
||||
pxor xmm6, xmm6 ;
|
||||
pxor xmm7, xmm7 ;
|
||||
mov rax, arg(5) ;HFilter ;
|
||||
|
||||
mov rdx, arg(6) ;VFilter ;
|
||||
mov rsi, arg(0) ;ref_ptr ;
|
||||
lea rsi, [GLOBAL(xmm_bi_rd)] ; rounding
|
||||
movdqa xmm4, XMMWORD PTR [rsi]
|
||||
|
||||
mov rdi, arg(2) ;src_ptr ;
|
||||
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||
lea rcx, [GLOBAL(vp8_bilinear_filters_sse2)]
|
||||
movsxd rax, dword ptr arg(5) ; xoffset
|
||||
|
||||
cmp rax, 0 ; skip first_pass filter if xoffset=0
|
||||
je filter_block2d_bil_var_sse2_sp_only
|
||||
|
||||
shl rax, 5 ; point to filter coeff with xoffset
|
||||
lea rax, [rax + rcx] ; HFilter
|
||||
|
||||
movsxd rdx, dword ptr arg(6) ; yoffset
|
||||
|
||||
cmp rdx, 0 ; skip second_pass filter if yoffset=0
|
||||
je filter_block2d_bil_var_sse2_fp_only
|
||||
|
||||
shl rdx, 5
|
||||
lea rdx, [rdx + rcx] ; VFilter
|
||||
|
||||
mov rsi, arg(0) ;ref_ptr
|
||||
mov rdi, arg(2) ;src_ptr
|
||||
movsxd rcx, dword ptr arg(4) ;Height
|
||||
|
||||
pxor xmm0, xmm0 ;
|
||||
movq xmm1, QWORD PTR [rsi] ;
|
||||
movq xmm1, QWORD PTR [rsi] ;
|
||||
movq xmm3, QWORD PTR [rsi+1] ;
|
||||
|
||||
movq xmm3, QWORD PTR [rsi+1] ;
|
||||
punpcklbw xmm1, xmm0 ;
|
||||
|
||||
pmullw xmm1, [rax] ;
|
||||
pmullw xmm1, [rax] ;
|
||||
punpcklbw xmm3, xmm0
|
||||
;
|
||||
pmullw xmm3, [rax+16] ;
|
||||
|
||||
paddw xmm1, xmm3 ;
|
||||
|
||||
paddw xmm1, [GLOBAL(xmm_bi_rd)] ;
|
||||
psraw xmm1, xmm_filter_shift ;
|
||||
|
||||
paddw xmm1, xmm4 ;
|
||||
psraw xmm1, xmm_filter_shift ;
|
||||
movdqa xmm5, xmm1
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
add rsi, r8
|
||||
%endif
|
||||
filter_block2d_bil_var_sse2_loop:
|
||||
|
||||
movsxd rbx, dword ptr arg(1) ;ref_pixels_per_line
|
||||
lea rsi, [rsi + rbx]
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r9, dword ptr arg(3) ;src_pixels_per_line
|
||||
%endif
|
||||
|
||||
filter_block2d_bil_var_sse2_loop:
|
||||
movq xmm1, QWORD PTR [rsi] ;
|
||||
movq xmm3, QWORD PTR [rsi+1] ;
|
||||
|
||||
punpcklbw xmm1, xmm0 ;
|
||||
pmullw xmm1, [rax] ;
|
||||
|
||||
punpcklbw xmm3, xmm0 ;
|
||||
pmullw xmm3, [rax+16] ;
|
||||
|
||||
paddw xmm1, xmm3 ;
|
||||
paddw xmm1, [GLOBAL(xmm_bi_rd)] ;
|
||||
|
||||
paddw xmm1, xmm4 ;
|
||||
psraw xmm1, xmm_filter_shift ;
|
||||
|
||||
movdqa xmm3, xmm5 ;
|
||||
|
||||
movdqa xmm5, xmm1 ;
|
||||
pmullw xmm3, [rdx] ;
|
||||
|
||||
pmullw xmm3, [rdx] ;
|
||||
pmullw xmm1, [rdx+16] ;
|
||||
paddw xmm1, xmm3 ;
|
||||
|
||||
paddw xmm1, [GLOBAL(xmm_bi_rd)] ;
|
||||
paddw xmm1, xmm4 ;
|
||||
psraw xmm1, xmm_filter_shift ;
|
||||
|
||||
movq xmm3, QWORD PTR [rdi] ;
|
||||
@@ -577,20 +589,103 @@ filter_block2d_bil_var_sse2_loop:
|
||||
pmaddwd xmm1, xmm1 ;
|
||||
paddd xmm7, xmm1 ;
|
||||
|
||||
lea rsi, [rsi + rbx] ;ref_pixels_per_line
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
add rdi, dword ptr arg(3) ;src_pixels_per_line ;
|
||||
add rdi, dword ptr arg(3) ;src_pixels_per_line
|
||||
%else
|
||||
movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
|
||||
movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
|
||||
add rsi, r8
|
||||
add rdi, r9
|
||||
lea rdi, [rdi + r9]
|
||||
%endif
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz filter_block2d_bil_var_sse2_loop ;
|
||||
|
||||
jmp filter_block2d_bil_variance
|
||||
|
||||
filter_block2d_bil_var_sse2_sp_only:
|
||||
movsxd rdx, dword ptr arg(6) ; yoffset
|
||||
shl rdx, 5
|
||||
lea rdx, [rdx + rcx] ; VFilter
|
||||
|
||||
mov rsi, arg(0) ;ref_ptr
|
||||
mov rdi, arg(2) ;src_ptr
|
||||
movsxd rcx, dword ptr arg(4) ;Height
|
||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
|
||||
pxor xmm0, xmm0 ;
|
||||
movq xmm1, QWORD PTR [rsi] ;
|
||||
punpcklbw xmm1, xmm0 ;
|
||||
|
||||
movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
|
||||
lea rsi, [rsi + rax]
|
||||
|
||||
filter_block2d_bil_sp_only_loop:
|
||||
movq xmm3, QWORD PTR [rsi] ;
|
||||
punpcklbw xmm3, xmm0 ;
|
||||
movdqa xmm5, xmm3
|
||||
|
||||
pmullw xmm1, [rdx] ;
|
||||
pmullw xmm3, [rdx+16] ;
|
||||
paddw xmm1, xmm3 ;
|
||||
paddw xmm1, xmm4 ;
|
||||
psraw xmm1, xmm_filter_shift ;
|
||||
|
||||
movq xmm3, QWORD PTR [rdi] ;
|
||||
punpcklbw xmm3, xmm0 ;
|
||||
|
||||
psubw xmm1, xmm3 ;
|
||||
paddw xmm6, xmm1 ;
|
||||
|
||||
pmaddwd xmm1, xmm1 ;
|
||||
paddd xmm7, xmm1 ;
|
||||
|
||||
movdqa xmm1, xmm5 ;
|
||||
lea rsi, [rsi + rax] ;ref_pixels_per_line
|
||||
lea rdi, [rdi + rbx] ;src_pixels_per_line
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz filter_block2d_bil_sp_only_loop ;
|
||||
|
||||
jmp filter_block2d_bil_variance
|
||||
|
||||
filter_block2d_bil_var_sse2_fp_only:
|
||||
mov rsi, arg(0) ;ref_ptr
|
||||
mov rdi, arg(2) ;src_ptr
|
||||
movsxd rcx, dword ptr arg(4) ;Height
|
||||
movsxd rdx, dword ptr arg(1) ;ref_pixels_per_line
|
||||
|
||||
pxor xmm0, xmm0 ;
|
||||
movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
|
||||
|
||||
filter_block2d_bil_fp_only_loop:
|
||||
movq xmm1, QWORD PTR [rsi] ;
|
||||
movq xmm3, QWORD PTR [rsi+1] ;
|
||||
|
||||
punpcklbw xmm1, xmm0 ;
|
||||
pmullw xmm1, [rax] ;
|
||||
punpcklbw xmm3, xmm0 ;
|
||||
pmullw xmm3, [rax+16] ;
|
||||
|
||||
paddw xmm1, xmm3 ;
|
||||
paddw xmm1, xmm4 ;
|
||||
psraw xmm1, xmm_filter_shift ;
|
||||
|
||||
movq xmm3, QWORD PTR [rdi] ;
|
||||
punpcklbw xmm3, xmm0 ;
|
||||
|
||||
psubw xmm1, xmm3 ;
|
||||
paddw xmm6, xmm1 ;
|
||||
|
||||
pmaddwd xmm1, xmm1 ;
|
||||
paddd xmm7, xmm1 ;
|
||||
lea rsi, [rsi + rdx]
|
||||
lea rdi, [rdi + rbx] ;src_pixels_per_line
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz filter_block2d_bil_fp_only_loop ;
|
||||
|
||||
jmp filter_block2d_bil_variance
|
||||
|
||||
filter_block2d_bil_variance:
|
||||
movdq2q mm6, xmm6 ;
|
||||
movdq2q mm7, xmm7 ;
|
||||
|
||||
@@ -627,12 +722,12 @@ filter_block2d_bil_var_sse2_loop:
|
||||
movd [rsi], mm2 ; xsum
|
||||
movd [rdi], mm4 ; xxsum
|
||||
|
||||
|
||||
; begin epilog
|
||||
add rsp, 16
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
@@ -974,3 +1069,13 @@ SECTION_RODATA
|
||||
align 16
|
||||
xmm_bi_rd:
|
||||
times 8 dw 64
|
||||
align 16
|
||||
vp8_bilinear_filters_sse2:
|
||||
dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16
|
||||
dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32
|
||||
dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48
|
||||
dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
|
||||
dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80
|
||||
dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96
|
||||
dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112
|
||||
|
@@ -76,8 +76,8 @@ void vp8_filter_block2d_bil_var_sse2
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
unsigned int Height,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
@@ -222,21 +222,6 @@ unsigned int vp8_variance8x16_wmt
|
||||
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// the mmx function that does the bilinear filtering and var calculation //
|
||||
// int one pass //
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_xmm[8][16]) =
|
||||
{
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
|
||||
{ 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
|
||||
{ 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
|
||||
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
|
||||
{ 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
|
||||
{ 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
|
||||
{ 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
|
||||
};
|
||||
unsigned int vp8_sub_pixel_variance4x4_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
@@ -272,15 +257,38 @@ unsigned int vp8_sub_pixel_variance8x8_wmt
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
|
||||
int xsum;
|
||||
unsigned int xxsum;
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
|
||||
&xsum, &xxsum
|
||||
);
|
||||
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
{
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
xoffset, yoffset,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
|
||||
*sse = xxsum;
|
||||
return (xxsum - ((xsum * xsum) >> 6));
|
||||
@@ -344,7 +352,7 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
|
||||
xoffset, yoffset,
|
||||
&xsum0, &xxsum0
|
||||
);
|
||||
|
||||
@@ -352,7 +360,7 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
|
||||
xoffset, yoffset,
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
}
|
||||
@@ -392,21 +400,56 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
{
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
|
||||
&xsum0, &xxsum0
|
||||
);
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
xoffset, yoffset,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
xoffset, yoffset,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
@@ -428,12 +471,36 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
|
||||
{
|
||||
int xsum;
|
||||
unsigned int xxsum;
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset],
|
||||
&xsum, &xxsum
|
||||
);
|
||||
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
{
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
xoffset, yoffset,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
|
||||
*sse = xxsum;
|
||||
return (xxsum - ((xsum * xsum) >> 7));
|
||||
|
@@ -108,37 +108,26 @@ void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
|
||||
|
||||
|
||||
int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
|
||||
short *qcoeff_ptr,short *dequant_ptr,
|
||||
const int *default_zig_zag, short *round_ptr,
|
||||
short *quant_ptr, short *dqcoeff_ptr,
|
||||
unsigned short zbin_oq_value,
|
||||
short *zbin_boost_ptr);
|
||||
short *qcoeff_ptr,short *dequant_ptr,
|
||||
const int *default_zig_zag, short *round_ptr,
|
||||
short *quant_ptr, short *dqcoeff_ptr,
|
||||
unsigned short zbin_oq_value,
|
||||
short *zbin_boost_ptr,
|
||||
short *quant_shift_ptr);
|
||||
|
||||
void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
|
||||
{
|
||||
short *zbin_boost_ptr = b->zrun_zbin_boost;
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *zbin_ptr = b->zbin;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
short zbin_oq_value = b->zbin_extra;
|
||||
|
||||
d->eob = vp8_regular_quantize_b_impl_sse2(
|
||||
coeff_ptr,
|
||||
zbin_ptr,
|
||||
qcoeff_ptr,
|
||||
dequant_ptr,
|
||||
vp8_default_zig_zag1d,
|
||||
|
||||
round_ptr,
|
||||
quant_ptr,
|
||||
dqcoeff_ptr,
|
||||
zbin_oq_value,
|
||||
zbin_boost_ptr
|
||||
);
|
||||
d->eob = vp8_regular_quantize_b_impl_sse2(b->coeff,
|
||||
b->zbin,
|
||||
d->qcoeff,
|
||||
d->dequant,
|
||||
vp8_default_zig_zag1d,
|
||||
b->round,
|
||||
b->quant,
|
||||
d->dqcoeff,
|
||||
b->zbin_extra,
|
||||
b->zrun_zbin_boost,
|
||||
b->quant_shift);
|
||||
}
|
||||
|
||||
int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
|
||||
@@ -307,7 +296,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2;
|
||||
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2;
|
||||
|
||||
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/
|
||||
#if ARCH_X86
|
||||
cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;
|
||||
#endif
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2;
|
||||
|
||||
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
|
||||
|
@@ -35,7 +35,7 @@ VP8_COMMON_SRCS-yes += common/entropy.c
|
||||
VP8_COMMON_SRCS-yes += common/entropymode.c
|
||||
VP8_COMMON_SRCS-yes += common/entropymv.c
|
||||
VP8_COMMON_SRCS-yes += common/extend.c
|
||||
VP8_COMMON_SRCS-yes += common/filter_c.c
|
||||
VP8_COMMON_SRCS-yes += common/filter.c
|
||||
VP8_COMMON_SRCS-yes += common/findnearmv.c
|
||||
VP8_COMMON_SRCS-yes += common/generic/systemdependent.c
|
||||
VP8_COMMON_SRCS-yes += common/idctllm.c
|
||||
@@ -111,14 +111,15 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
|
||||
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
|
||||
endif
|
||||
|
||||
VP8_COMMON_SRCS-$(ARCH_ARM) += common/asm_com_offsets.c
|
||||
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c
|
||||
|
||||
# common (c)
|
||||
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/bilinearfilter_arm.c
|
||||
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/bilinearfilter_arm.h
|
||||
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/filter_arm.c
|
||||
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/loopfilter_arm.c
|
||||
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra_arm.c
|
||||
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/vpx_asm_offsets.c
|
||||
|
||||
# common (armv6)
|
||||
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM)
|
||||
@@ -161,16 +162,3 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon16x16mb_neon$(ASM)
|
||||
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
|
||||
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/save_neon_reg$(ASM)
|
||||
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon_neon.c
|
||||
|
||||
|
||||
#
|
||||
# Rule to extract assembly constants from C sources
|
||||
#
|
||||
ifeq ($(ARCH_ARM),yes)
|
||||
vpx_asm_offsets.asm: obj_int_extract
|
||||
vpx_asm_offsets.asm: $(VP8_PREFIX)common/arm/vpx_asm_offsets.c.o
|
||||
./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP8_PREFIX)common/arm/vpx_asm_offsets.c.o
|
||||
CLEAN-OBJS += vpx_asm_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): vpx_asm_offsets.asm
|
||||
endif
|
||||
|
@@ -142,8 +142,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
|
||||
RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
|
||||
RANGE_CHECK_HI(cfg, g_profile, 3);
|
||||
RANGE_CHECK_HI(cfg, rc_min_quantizer, 63);
|
||||
RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
|
||||
RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
|
||||
RANGE_CHECK_HI(cfg, g_threads, 64);
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
RANGE_CHECK_HI(cfg, g_lag_in_frames, 25);
|
||||
@@ -912,8 +912,8 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
|
||||
ctx->preview_img.x_chroma_shift = 1;
|
||||
ctx->preview_img.y_chroma_shift = 1;
|
||||
|
||||
ctx->preview_img.d_w = ctx->cfg.g_w;
|
||||
ctx->preview_img.d_h = ctx->cfg.g_h;
|
||||
ctx->preview_img.d_w = sd.y_width;
|
||||
ctx->preview_img.d_h = sd.y_height;
|
||||
ctx->preview_img.stride[VPX_PLANE_Y] = sd.y_stride;
|
||||
ctx->preview_img.stride[VPX_PLANE_U] = sd.uv_stride;
|
||||
ctx->preview_img.stride[VPX_PLANE_V] = sd.uv_stride;
|
||||
|
@@ -42,7 +42,7 @@ VP8_CX_SRCS-yes += encoder/encodeframe.c
|
||||
VP8_CX_SRCS-yes += encoder/encodeintra.c
|
||||
VP8_CX_SRCS-yes += encoder/encodemb.c
|
||||
VP8_CX_SRCS-yes += encoder/encodemv.c
|
||||
VP8_CX_SRCS-yes += encoder/ethreading.c
|
||||
VP8_CX_SRCS-$(CONFIG_MULTITHREAD) += encoder/ethreading.c
|
||||
VP8_CX_SRCS-yes += encoder/firstpass.c
|
||||
VP8_CX_SRCS-yes += encoder/generic/csystemdependent.c
|
||||
VP8_CX_SRCS-yes += encoder/block.h
|
||||
|
@@ -14,10 +14,13 @@
|
||||
#File list for arm
|
||||
# encoder
|
||||
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c
|
||||
VP8_CX_SRCS-$(ARCH_ARM) += encoder/asm_enc_offsets.c
|
||||
|
||||
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/encodemb_arm.c
|
||||
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/quantize_arm.c
|
||||
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/picklpf_arm.c
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/variance_arm.c
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/variance_arm.h
|
||||
VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
|
||||
|
||||
VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE) += encoder/boolhuff.c
|
||||
@@ -31,6 +34,7 @@ VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_ar
|
||||
|
||||
#File list for armv6
|
||||
# encoder
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/walsh_v6$(ASM)
|
||||
|
||||
#File list for neon
|
||||
@@ -49,17 +53,3 @@ VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16_neon$(A
|
||||
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_memcpy_neon$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
|
||||
|
||||
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/vpx_vp8_enc_asm_offsets.c
|
||||
|
||||
#
|
||||
# Rule to extract assembly constants from C sources
|
||||
#
|
||||
ifeq ($(ARCH_ARM),yes)
|
||||
vpx_vp8_enc_asm_offsets.asm: obj_int_extract
|
||||
vpx_vp8_enc_asm_offsets.asm: $(VP8_PREFIX)encoder/arm/vpx_vp8_enc_asm_offsets.c.o
|
||||
./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP8_PREFIX)encoder/arm/vpx_vp7_enc_asm_offsets.c.o
|
||||
CLEAN-OBJS += vpx_vp8_enc_asm_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): vpx_vp8_enc_asm_offsets.asm
|
||||
endif
|
||||
|
@@ -65,7 +65,7 @@ VP8_DX_SRCS-yes += decoder/detokenize.h
|
||||
VP8_DX_SRCS-yes += decoder/onyxd_int.h
|
||||
VP8_DX_SRCS-yes += decoder/treereader.h
|
||||
VP8_DX_SRCS-yes += decoder/onyxd_if.c
|
||||
VP8_DX_SRCS-yes += decoder/threading.c
|
||||
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c
|
||||
VP8_DX_SRCS-yes += decoder/idct_blk.c
|
||||
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.h
|
||||
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.c
|
||||
|
@@ -12,9 +12,9 @@
|
||||
#VP8_DX_SRCS list is modified according to different platforms.
|
||||
|
||||
VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c
|
||||
VP8_CX_SRCS-$(ARCH_ARM) += decoder/asm_dec_offsets.c
|
||||
|
||||
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dequantize_arm.c
|
||||
VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK) += decoder/arm/detokenize$(ASM)
|
||||
|
||||
#File list for armv6
|
||||
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_dc_idct_v6$(ASM)
|
||||
|
@@ -321,7 +321,7 @@ typedef struct vpx_codec_priv_cb_pair
|
||||
{
|
||||
vpx_codec_put_frame_cb_fn_t put_frame;
|
||||
vpx_codec_put_slice_cb_fn_t put_slice;
|
||||
};
|
||||
} u;
|
||||
void *user_priv;
|
||||
} vpx_codec_priv_cb_pair_t;
|
||||
|
||||
|
@@ -160,7 +160,7 @@ vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx
|
||||
res = VPX_CODEC_ERROR;
|
||||
else
|
||||
{
|
||||
ctx->priv->dec.put_frame_cb.put_frame = cb;
|
||||
ctx->priv->dec.put_frame_cb.u.put_frame = cb;
|
||||
ctx->priv->dec.put_frame_cb.user_priv = user_priv;
|
||||
res = VPX_CODEC_OK;
|
||||
}
|
||||
@@ -182,7 +182,7 @@ vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx
|
||||
res = VPX_CODEC_ERROR;
|
||||
else
|
||||
{
|
||||
ctx->priv->dec.put_slice_cb.put_slice = cb;
|
||||
ctx->priv->dec.put_slice_cb.u.put_slice = cb;
|
||||
ctx->priv->dec.put_slice_cb.user_priv = user_priv;
|
||||
res = VPX_CODEC_OK;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user