Compare commits
70 Commits
v0.9.6
...
sandbox/at
Author | SHA1 | Date | |
---|---|---|---|
![]() |
744a58bc1c | ||
![]() |
86b5556f5a | ||
![]() |
4375b4ac39 | ||
![]() |
71595edd47 | ||
![]() |
848dddee15 | ||
![]() |
f1ba70e199 | ||
![]() |
a22df2e29d | ||
![]() |
185557344a | ||
![]() |
de5182eef3 | ||
![]() |
8431e768c9 | ||
![]() |
de50520a8c | ||
![]() |
346b3e7ce9 | ||
![]() |
71bcd9f1af | ||
![]() |
6795e256c1 | ||
![]() |
8c48c943e7 | ||
![]() |
aa4a90c880 | ||
![]() |
2ec0cfbe99 | ||
![]() |
d0ec28b3d3 | ||
![]() |
e54dcfe88d | ||
![]() |
52f6e28e9e | ||
![]() |
3788b3564c | ||
![]() |
27972d2c1d | ||
![]() |
5c60a646f3 | ||
![]() |
75051c8b59 | ||
![]() |
5db0eeea21 | ||
![]() |
6e73748492 | ||
![]() |
170b87390e | ||
![]() |
2ae91fbef0 | ||
![]() |
e34e417d94 | ||
![]() |
3c9dd6c3ef | ||
![]() |
c5c5dcd0be | ||
![]() |
29c46b64a2 | ||
![]() |
3dc382294b | ||
![]() |
3f6f7289aa | ||
![]() |
b2aa401776 | ||
![]() |
76ec21928c | ||
![]() |
9c836daf65 | ||
![]() |
3ae2465788 | ||
![]() |
7ab08e1fee | ||
![]() |
128d2c23b3 | ||
![]() |
6daacdb785 | ||
![]() |
ed40ff9e2d | ||
![]() |
f3e9e2a0f8 | ||
![]() |
a0306ea660 | ||
![]() |
c5a049babd | ||
![]() |
5c24071504 | ||
![]() |
43baf7ff21 | ||
![]() |
7b8e7f0f3a | ||
![]() |
4561109a69 | ||
![]() |
7966dd5287 | ||
![]() |
fa836faede | ||
![]() |
56efffdcd1 | ||
![]() |
fb037ec05b | ||
![]() |
419f638910 | ||
![]() |
95adf3df77 | ||
![]() |
859abd6b5d | ||
![]() |
8432a1729f | ||
![]() |
e8f7b0f7f5 | ||
![]() |
244e2e1451 | ||
![]() |
5091e01ea1 | ||
![]() |
ddd260eb62 | ||
![]() |
e6948bf0f9 | ||
![]() |
de87c420ef | ||
![]() |
0eccee4378 | ||
![]() |
5d1d9911cb | ||
![]() |
1016b856d1 | ||
![]() |
fe9a604b1e | ||
![]() |
bc9c30a003 | ||
![]() |
9fc8cb39aa | ||
![]() |
84f7f20985 |
@@ -152,8 +152,8 @@ endif
|
||||
# Rule to extract assembly constants from C sources
|
||||
#
|
||||
obj_int_extract: build/make/obj_int_extract.c
|
||||
$(if $(quiet),echo " [HOSTCC] $@")
|
||||
$(qexec)$(HOSTCC) -I. -o $@ $<
|
||||
$(if $(quiet),@echo " [HOSTCC] $@")
|
||||
$(qexec)$(HOSTCC) -I. -I$(SRC_PATH_BARE) -o $@ $<
|
||||
CLEAN-OBJS += obj_int_extract
|
||||
|
||||
#
|
||||
@@ -255,7 +255,7 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),)
|
||||
endif
|
||||
|
||||
#
|
||||
# Configuration dependant rules
|
||||
# Configuration dependent rules
|
||||
#
|
||||
$(call pairmap,install_map_templates,$(INSTALL_MAPS))
|
||||
|
||||
@@ -332,7 +332,7 @@ ifneq ($(call enabled,DIST-SRCS),)
|
||||
DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/yasm.rules
|
||||
DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
|
||||
#
|
||||
# This isn't really ARCH_ARM dependent, it's dependant on whether we're
|
||||
# This isn't really ARCH_ARM dependent, it's dependent on whether we're
|
||||
# using assembly code or not (CONFIG_OPTIMIZATIONS maybe). Just use
|
||||
# this for now.
|
||||
DIST-SRCS-$(ARCH_ARM) += build/make/obj_int_extract.c
|
||||
|
@@ -83,7 +83,7 @@ Build options:
|
||||
${toggle_werror} treat warnings as errors, if possible
|
||||
(not available with all compilers)
|
||||
${toggle_optimizations} turn on/off compiler optimization flags
|
||||
${toggle_pic} turn on/off Position Independant Code
|
||||
${toggle_pic} turn on/off Position Independent Code
|
||||
${toggle_ccache} turn on/off compiler cache
|
||||
${toggle_debug} enable/disable debug mode
|
||||
${toggle_gprof} enable/disable gprof profiling instrumentation
|
||||
@@ -957,7 +957,7 @@ process_common_toolchain() {
|
||||
enabled small && check_add_cflags -O2 || check_add_cflags -O3
|
||||
fi
|
||||
|
||||
# Position Independant Code (PIC) support, for building relocatable
|
||||
# Position Independent Code (PIC) support, for building relocatable
|
||||
# shared objects
|
||||
enabled gcc && enabled pic && check_add_cflags -fPIC
|
||||
|
||||
|
@@ -33,6 +33,7 @@ Options:
|
||||
--proj-guid=GUID GUID to use for the project
|
||||
--module-def=filename File containing export definitions (for DLLs)
|
||||
--ver=version Version (7,8,9) of visual studio to generate for
|
||||
--src-path-bare=dir Path to root of source tree
|
||||
-Ipath/to/include Additional include directories
|
||||
-DFLAG[=value] Preprocessor macros to define
|
||||
-Lpath/to/lib Additional library search paths
|
||||
@@ -191,6 +192,8 @@ for opt in "$@"; do
|
||||
;;
|
||||
--lib) proj_kind="lib"
|
||||
;;
|
||||
--src-path-bare=*) src_path_bare="$optval"
|
||||
;;
|
||||
--static-crt) use_static_runtime=true
|
||||
;;
|
||||
--ver=*)
|
||||
@@ -335,6 +338,35 @@ generate_vcproj() {
|
||||
case "$target" in
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
Optimization="0" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
|
||||
RuntimeLibrary="$debug_runtime" \
|
||||
WarningLevel="3" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
DebugInformationFormat="1" \
|
||||
;;
|
||||
vpx)
|
||||
tag Tool \
|
||||
Name="VCPreBuildEventTool" \
|
||||
CommandLine="call obj_int_extract.bat $src_path_bare" \
|
||||
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
Optimization="0" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
|
||||
RuntimeLibrary="$debug_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="1" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
|
||||
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="1"
|
||||
;;
|
||||
*)
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
@@ -358,6 +390,12 @@ generate_vcproj() {
|
||||
case "$target" in
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
OutputFile="${name}.exe" \
|
||||
GenerateDebugInformation="true" \
|
||||
;;
|
||||
*)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
@@ -406,6 +444,34 @@ generate_vcproj() {
|
||||
case "$target" in
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
|
||||
RuntimeLibrary="$release_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
DebugInformationFormat="0" \
|
||||
;;
|
||||
vpx)
|
||||
tag Tool \
|
||||
Name="VCPreBuildEventTool" \
|
||||
CommandLine="call obj_int_extract.bat $src_path_bare" \
|
||||
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
|
||||
RuntimeLibrary="$release_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="0" \
|
||||
Detect64BitPortabilityProblems="true" \
|
||||
|
||||
$uses_asm && tag Tool Name="YASM" IncludePaths="$incs"
|
||||
;;
|
||||
*)
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
@@ -428,6 +494,12 @@ generate_vcproj() {
|
||||
case "$target" in
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
OutputFile="${name}.exe" \
|
||||
GenerateDebugInformation="true" \
|
||||
;;
|
||||
*)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
|
@@ -14,7 +14,7 @@
|
||||
|
||||
#include "vpx_config.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#include <io.h>
|
||||
#include <share.h>
|
||||
#include "vpx/vpx_integer.h"
|
||||
@@ -59,20 +59,47 @@ int parse_macho(uint8_t *base_buf, size_t sz)
|
||||
struct mach_header header;
|
||||
uint8_t *buf = base_buf;
|
||||
int base_data_section = 0;
|
||||
int bits = 0;
|
||||
|
||||
/* We can read in mach_header for 32 and 64 bit architectures
|
||||
* because it's identical to mach_header_64 except for the last
|
||||
* element (uint32_t reserved), which we don't use. Then, when
|
||||
* we know which architecture we're looking at, increment buf
|
||||
* appropriately.
|
||||
*/
|
||||
memcpy(&header, buf, sizeof(struct mach_header));
|
||||
buf += sizeof(struct mach_header);
|
||||
|
||||
if (header.magic != MH_MAGIC)
|
||||
if (header.magic == MH_MAGIC)
|
||||
{
|
||||
log_msg("Bad magic number for object file. 0x%x expected, 0x%x found.\n",
|
||||
header.magic, MH_MAGIC);
|
||||
goto bail;
|
||||
if (header.cputype == CPU_TYPE_ARM
|
||||
|| header.cputype == CPU_TYPE_X86)
|
||||
{
|
||||
bits = 32;
|
||||
buf += sizeof(struct mach_header);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_[ARM|X86].\n");
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
if (header.cputype != CPU_TYPE_ARM)
|
||||
else if (header.magic == MH_MAGIC_64)
|
||||
{
|
||||
log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_ARM.\n");
|
||||
if (header.cputype == CPU_TYPE_X86_64)
|
||||
{
|
||||
bits = 64;
|
||||
buf += sizeof(struct mach_header_64);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_X86_64.\n");
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
log_msg("Bad magic number for object file. 0x%x or 0x%x expected, 0x%x found.\n",
|
||||
MH_MAGIC, MH_MAGIC_64, header.magic);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
@@ -85,8 +112,6 @@ int parse_macho(uint8_t *base_buf, size_t sz)
|
||||
for (i = 0; i < header.ncmds; i++)
|
||||
{
|
||||
struct load_command lc;
|
||||
struct symtab_command sc;
|
||||
struct segment_command seg_c;
|
||||
|
||||
memcpy(&lc, buf, sizeof(struct load_command));
|
||||
|
||||
@@ -94,50 +119,99 @@ int parse_macho(uint8_t *base_buf, size_t sz)
|
||||
{
|
||||
uint8_t *seg_buf = buf;
|
||||
struct section s;
|
||||
struct segment_command seg_c;
|
||||
|
||||
memcpy(&seg_c, buf, sizeof(struct segment_command));
|
||||
|
||||
memcpy(&seg_c, seg_buf, sizeof(struct segment_command));
|
||||
seg_buf += sizeof(struct segment_command);
|
||||
|
||||
for (j = 0; j < seg_c.nsects; j++)
|
||||
/* Although each section is given it's own offset, nlist.n_value
|
||||
* references the offset of the first section. This isn't
|
||||
* apparent without debug information because the offset of the
|
||||
* data section is the same as the first section. However, with
|
||||
* debug sections mixed in, the offset of the debug section
|
||||
* increases but n_value still references the first section.
|
||||
*/
|
||||
if (seg_c.nsects < 1)
|
||||
{
|
||||
memcpy(&s, seg_buf + (j * sizeof(struct section)), sizeof(struct section));
|
||||
|
||||
// Need to get this offset which is the start of the symbol table
|
||||
// before matching the strings up with symbols.
|
||||
base_data_section = s.offset;
|
||||
log_msg("Not enough sections\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
memcpy(&s, seg_buf, sizeof(struct section));
|
||||
base_data_section = s.offset;
|
||||
}
|
||||
else if (lc.cmd == LC_SEGMENT_64)
|
||||
{
|
||||
uint8_t *seg_buf = buf;
|
||||
struct section_64 s;
|
||||
struct segment_command_64 seg_c;
|
||||
|
||||
memcpy(&seg_c, seg_buf, sizeof(struct segment_command_64));
|
||||
seg_buf += sizeof(struct segment_command_64);
|
||||
|
||||
/* Explanation in LG_SEGMENT */
|
||||
if (seg_c.nsects < 1)
|
||||
{
|
||||
log_msg("Not enough sections\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
memcpy(&s, seg_buf, sizeof(struct section_64));
|
||||
base_data_section = s.offset;
|
||||
}
|
||||
else if (lc.cmd == LC_SYMTAB)
|
||||
{
|
||||
uint8_t *sym_buf = base_buf;
|
||||
uint8_t *str_buf = base_buf;
|
||||
|
||||
if (base_data_section != 0)
|
||||
{
|
||||
struct symtab_command sc;
|
||||
uint8_t *sym_buf = base_buf;
|
||||
uint8_t *str_buf = base_buf;
|
||||
|
||||
memcpy(&sc, buf, sizeof(struct symtab_command));
|
||||
|
||||
if (sc.cmdsize != sizeof(struct symtab_command))
|
||||
{
|
||||
log_msg("Can't find symbol table!\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
sym_buf += sc.symoff;
|
||||
str_buf += sc.stroff;
|
||||
|
||||
for (j = 0; j < sc.nsyms; j++)
|
||||
{
|
||||
struct nlist nl;
|
||||
int val;
|
||||
/* Location of string is cacluated each time from the
|
||||
* start of the string buffer. On darwin the symbols
|
||||
* are prefixed by "_", so we bump the pointer by 1.
|
||||
* The target value is defined as an int in asm_*_offsets.c,
|
||||
* which is 4 bytes on all targets we currently use.
|
||||
*/
|
||||
if (bits == 32)
|
||||
{
|
||||
struct nlist nl;
|
||||
int val;
|
||||
|
||||
memcpy(&nl, sym_buf + (j * sizeof(struct nlist)), sizeof(struct nlist));
|
||||
memcpy(&nl, sym_buf, sizeof(struct nlist));
|
||||
sym_buf += sizeof(struct nlist);
|
||||
|
||||
val = *((int *)(base_buf + base_data_section + nl.n_value));
|
||||
memcpy(&val, base_buf + base_data_section + nl.n_value,
|
||||
sizeof(val));
|
||||
printf("%-40s EQU %5d\n",
|
||||
str_buf + nl.n_un.n_strx + 1, val);
|
||||
}
|
||||
else /* if (bits == 64) */
|
||||
{
|
||||
struct nlist_64 nl;
|
||||
int val;
|
||||
|
||||
// Location of string is cacluated each time from the
|
||||
// start of the string buffer. On darwin the symbols
|
||||
// are prefixed by "_". On other platforms it is not
|
||||
// so it needs to be removed. That is the reason for
|
||||
// the +1.
|
||||
printf("%-40s EQU %5d\n", str_buf + nl.n_un.n_strx + 1, val);
|
||||
memcpy(&nl, sym_buf, sizeof(struct nlist_64));
|
||||
sym_buf += sizeof(struct nlist_64);
|
||||
|
||||
memcpy(&val, base_buf + base_data_section + nl.n_value,
|
||||
sizeof(val));
|
||||
printf("%-40s EQU %5d\n",
|
||||
str_buf + nl.n_un.n_strx + 1, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -218,7 +292,7 @@ bail:
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
#else
|
||||
#elif defined(__ELF__)
|
||||
#include "elf.h"
|
||||
|
||||
#define COPY_STRUCT(dst, buf, ofst, sz) do {\
|
||||
@@ -237,212 +311,420 @@ bail:
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t *buf; /* Buffer containing ELF data */
|
||||
size_t sz; /* Buffer size */
|
||||
int le_data; /* Data is little-endian */
|
||||
Elf32_Ehdr hdr;
|
||||
uint8_t *buf; /* Buffer containing ELF data */
|
||||
size_t sz; /* Buffer size */
|
||||
int le_data; /* Data is little-endian */
|
||||
unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
|
||||
int bits; /* 32 or 64 */
|
||||
Elf32_Ehdr hdr32;
|
||||
Elf64_Ehdr hdr64;
|
||||
} elf_obj_t;
|
||||
|
||||
int parse_elf32_header(elf_obj_t *elf)
|
||||
int parse_elf_header(elf_obj_t *elf)
|
||||
{
|
||||
int res;
|
||||
/* Verify ELF32 header */
|
||||
COPY_STRUCT(&elf->hdr, elf->buf, 0, elf->sz);
|
||||
res = elf->hdr.e_ident[EI_MAG0] == ELFMAG0;
|
||||
res &= elf->hdr.e_ident[EI_MAG1] == ELFMAG1;
|
||||
res &= elf->hdr.e_ident[EI_MAG2] == ELFMAG2;
|
||||
res &= elf->hdr.e_ident[EI_MAG3] == ELFMAG3;
|
||||
res &= elf->hdr.e_ident[EI_CLASS] == ELFCLASS32;
|
||||
res &= elf->hdr.e_ident[EI_DATA] == ELFDATA2LSB
|
||||
|| elf->hdr.e_ident[EI_DATA] == ELFDATA2MSB;
|
||||
/* Verify ELF Magic numbers */
|
||||
COPY_STRUCT(&elf->e_ident, elf->buf, 0, elf->sz);
|
||||
res = elf->e_ident[EI_MAG0] == ELFMAG0;
|
||||
res &= elf->e_ident[EI_MAG1] == ELFMAG1;
|
||||
res &= elf->e_ident[EI_MAG2] == ELFMAG2;
|
||||
res &= elf->e_ident[EI_MAG3] == ELFMAG3;
|
||||
res &= elf->e_ident[EI_CLASS] == ELFCLASS32
|
||||
|| elf->e_ident[EI_CLASS] == ELFCLASS64;
|
||||
res &= elf->e_ident[EI_DATA] == ELFDATA2LSB;
|
||||
|
||||
if (!res) goto bail;
|
||||
|
||||
elf->le_data = elf->hdr.e_ident[EI_DATA] == ELFDATA2LSB;
|
||||
elf->le_data = elf->e_ident[EI_DATA] == ELFDATA2LSB;
|
||||
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_machine);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_version);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_entry);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_phoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_ehsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_phentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_phnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shstrndx);
|
||||
return 0;
|
||||
bail:
|
||||
return 1;
|
||||
}
|
||||
|
||||
int parse_elf32_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr)
|
||||
{
|
||||
if (idx >= elf->hdr.e_shnum)
|
||||
goto bail;
|
||||
|
||||
COPY_STRUCT(hdr, elf->buf, elf->hdr.e_shoff + idx * elf->hdr.e_shentsize,
|
||||
elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_addr);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_offset);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_link);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_addralign);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_entsize);
|
||||
return 0;
|
||||
bail:
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *parse_elf32_string_table(elf_obj_t *elf, int s_idx, int idx)
|
||||
{
|
||||
Elf32_Shdr shdr;
|
||||
|
||||
if (parse_elf32_section(elf, s_idx, &shdr))
|
||||
/* Read in relevant values */
|
||||
if (elf->e_ident[EI_CLASS] == ELFCLASS32)
|
||||
{
|
||||
log_msg("Failed to parse ELF string table: section %d, index %d\n",
|
||||
s_idx, idx);
|
||||
return "";
|
||||
elf->bits = 32;
|
||||
COPY_STRUCT(&elf->hdr32, elf->buf, 0, elf->sz);
|
||||
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_machine);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_version);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_entry);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_ehsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shstrndx);
|
||||
}
|
||||
else /* if (elf->e_ident[EI_CLASS] == ELFCLASS64) */
|
||||
{
|
||||
elf->bits = 64;
|
||||
COPY_STRUCT(&elf->hdr64, elf->buf, 0, elf->sz);
|
||||
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_machine);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_version);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_entry);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_ehsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shstrndx);
|
||||
}
|
||||
|
||||
return (char *)(elf->buf + shdr.sh_offset + idx);
|
||||
return 0;
|
||||
bail:
|
||||
log_msg("Failed to parse ELF file header");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int parse_elf32_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym)
|
||||
int parse_elf_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr32, Elf64_Shdr *hdr64)
|
||||
{
|
||||
COPY_STRUCT(sym, elf->buf, ofst, elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_value);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_other);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_shndx);
|
||||
if (hdr32)
|
||||
{
|
||||
if (idx >= elf->hdr32.e_shnum)
|
||||
goto bail;
|
||||
|
||||
COPY_STRUCT(hdr32, elf->buf, elf->hdr32.e_shoff + idx * elf->hdr32.e_shentsize,
|
||||
elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addr);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_offset);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_link);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addralign);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_entsize);
|
||||
}
|
||||
else /* if (hdr64) */
|
||||
{
|
||||
if (idx >= elf->hdr64.e_shnum)
|
||||
goto bail;
|
||||
|
||||
COPY_STRUCT(hdr64, elf->buf, elf->hdr64.e_shoff + idx * elf->hdr64.e_shentsize,
|
||||
elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addr);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_offset);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_link);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addralign);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_entsize);
|
||||
}
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
return 1;
|
||||
}
|
||||
|
||||
int parse_elf32(uint8_t *buf, size_t sz, output_fmt_t mode)
|
||||
char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx)
|
||||
{
|
||||
elf_obj_t elf;
|
||||
Elf32_Shdr shdr;
|
||||
if (elf->bits == 32)
|
||||
{
|
||||
Elf32_Shdr shdr;
|
||||
|
||||
if (parse_elf_section(elf, s_idx, &shdr, NULL))
|
||||
{
|
||||
log_msg("Failed to parse ELF string table: section %d, index %d\n",
|
||||
s_idx, idx);
|
||||
return "";
|
||||
}
|
||||
|
||||
return (char *)(elf->buf + shdr.sh_offset + idx);
|
||||
}
|
||||
else /* if (elf->bits == 64) */
|
||||
{
|
||||
Elf64_Shdr shdr;
|
||||
|
||||
if (parse_elf_section(elf, s_idx, NULL, &shdr))
|
||||
{
|
||||
log_msg("Failed to parse ELF string table: section %d, index %d\n",
|
||||
s_idx, idx);
|
||||
return "";
|
||||
}
|
||||
|
||||
return (char *)(elf->buf + shdr.sh_offset + idx);
|
||||
}
|
||||
}
|
||||
|
||||
int parse_elf_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym32, Elf64_Sym *sym64)
|
||||
{
|
||||
if (sym32)
|
||||
{
|
||||
COPY_STRUCT(sym32, elf->buf, ofst, elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_value);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_other);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym32->st_shndx);
|
||||
}
|
||||
else /* if (sym64) */
|
||||
{
|
||||
COPY_STRUCT(sym64, elf->buf, ofst, elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_value);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_other);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym64->st_shndx);
|
||||
}
|
||||
return 0;
|
||||
bail:
|
||||
return 1;
|
||||
}
|
||||
|
||||
int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode)
|
||||
{
|
||||
elf_obj_t elf;
|
||||
unsigned int ofst;
|
||||
int i;
|
||||
Elf32_Off strtab_off; /* save String Table offset for later use */
|
||||
int i;
|
||||
Elf32_Off strtab_off32;
|
||||
Elf64_Off strtab_off64; /* save String Table offset for later use */
|
||||
|
||||
memset(&elf, 0, sizeof(elf));
|
||||
elf.buf = buf;
|
||||
elf.sz = sz;
|
||||
|
||||
/* Parse Header */
|
||||
if (parse_elf32_header(&elf))
|
||||
{
|
||||
log_msg("Parse error: File does not appear to be valid ELF32\n");
|
||||
return 1;
|
||||
}
|
||||
if (parse_elf_header(&elf))
|
||||
goto bail;
|
||||
|
||||
for (i = 0; i < elf.hdr.e_shnum; i++)
|
||||
if (elf.bits == 32)
|
||||
{
|
||||
parse_elf32_section(&elf, i, &shdr);
|
||||
|
||||
if (shdr.sh_type == SHT_STRTAB)
|
||||
Elf32_Shdr shdr;
|
||||
for (i = 0; i < elf.hdr32.e_shnum; i++)
|
||||
{
|
||||
char strtsb_name[128];
|
||||
parse_elf_section(&elf, i, &shdr, NULL);
|
||||
|
||||
strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
|
||||
|
||||
if (!(strcmp(strtsb_name, ".shstrtab")))
|
||||
if (shdr.sh_type == SHT_STRTAB)
|
||||
{
|
||||
log_msg("found section: %s\n", strtsb_name);
|
||||
strtab_off = shdr.sh_offset;
|
||||
break;
|
||||
char strtsb_name[128];
|
||||
|
||||
strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
|
||||
|
||||
if (!(strcmp(strtsb_name, ".shstrtab")))
|
||||
{
|
||||
/* log_msg("found section: %s\n", strtsb_name); */
|
||||
strtab_off32 = shdr.sh_offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else /* if (elf.bits == 64) */
|
||||
{
|
||||
Elf64_Shdr shdr;
|
||||
for (i = 0; i < elf.hdr64.e_shnum; i++)
|
||||
{
|
||||
parse_elf_section(&elf, i, NULL, &shdr);
|
||||
|
||||
if (shdr.sh_type == SHT_STRTAB)
|
||||
{
|
||||
char strtsb_name[128];
|
||||
|
||||
strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
|
||||
|
||||
if (!(strcmp(strtsb_name, ".shstrtab")))
|
||||
{
|
||||
/* log_msg("found section: %s\n", strtsb_name); */
|
||||
strtab_off64 = shdr.sh_offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Parse all Symbol Tables */
|
||||
for (i = 0; i < elf.hdr.e_shnum; i++)
|
||||
if (elf.bits == 32)
|
||||
{
|
||||
|
||||
parse_elf32_section(&elf, i, &shdr);
|
||||
|
||||
if (shdr.sh_type == SHT_SYMTAB)
|
||||
Elf32_Shdr shdr;
|
||||
for (i = 0; i < elf.hdr32.e_shnum; i++)
|
||||
{
|
||||
for (ofst = shdr.sh_offset;
|
||||
ofst < shdr.sh_offset + shdr.sh_size;
|
||||
ofst += shdr.sh_entsize)
|
||||
parse_elf_section(&elf, i, &shdr, NULL);
|
||||
|
||||
if (shdr.sh_type == SHT_SYMTAB)
|
||||
{
|
||||
Elf32_Sym sym;
|
||||
|
||||
parse_elf32_symbol(&elf, ofst, &sym);
|
||||
|
||||
/* For all OBJECTS (data objects), extract the value from the
|
||||
* proper data segment.
|
||||
*/
|
||||
if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
|
||||
log_msg("found data object %s\n",
|
||||
parse_elf32_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name));
|
||||
|
||||
if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
|
||||
&& sym.st_size == 4)
|
||||
for (ofst = shdr.sh_offset;
|
||||
ofst < shdr.sh_offset + shdr.sh_size;
|
||||
ofst += shdr.sh_entsize)
|
||||
{
|
||||
Elf32_Shdr dhdr;
|
||||
int32_t val;
|
||||
char section_name[128];
|
||||
Elf32_Sym sym;
|
||||
|
||||
parse_elf32_section(&elf, sym.st_shndx, &dhdr);
|
||||
parse_elf_symbol(&elf, ofst, &sym, NULL);
|
||||
|
||||
/* For explanition - refer to _MSC_VER version of code */
|
||||
strcpy(section_name, (char *)(elf.buf + strtab_off + dhdr.sh_name));
|
||||
log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type);
|
||||
/* For all OBJECTS (data objects), extract the value from the
|
||||
* proper data segment.
|
||||
*/
|
||||
/* if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
|
||||
log_msg("found data object %s\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name));
|
||||
*/
|
||||
|
||||
if (!(strcmp(section_name, ".bss")))
|
||||
if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
|
||||
&& sym.st_size == 4)
|
||||
{
|
||||
val = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(&val,
|
||||
elf.buf + dhdr.sh_offset + sym.st_value,
|
||||
sizeof(val));
|
||||
Elf32_Shdr dhdr;
|
||||
int val = 0;
|
||||
char section_name[128];
|
||||
|
||||
parse_elf_section(&elf, sym.st_shndx, &dhdr, NULL);
|
||||
|
||||
/* For explanition - refer to _MSC_VER version of code */
|
||||
strcpy(section_name, (char *)(elf.buf + strtab_off32 + dhdr.sh_name));
|
||||
/* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
|
||||
|
||||
if (strcmp(section_name, ".bss"))
|
||||
{
|
||||
if (sizeof(val) != sym.st_size)
|
||||
{
|
||||
/* The target value is declared as an int in
|
||||
* asm_*_offsets.c, which is 4 bytes on all
|
||||
* targets we currently use. Complain loudly if
|
||||
* this is not true.
|
||||
*/
|
||||
log_msg("Symbol size is wrong\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
memcpy(&val,
|
||||
elf.buf + dhdr.sh_offset + sym.st_value,
|
||||
sym.st_size);
|
||||
}
|
||||
|
||||
if (!elf.le_data)
|
||||
{
|
||||
log_msg("Big Endian data not supported yet!\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case OUTPUT_FMT_RVDS:
|
||||
printf("%-40s EQU %5d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
case OUTPUT_FMT_GAS:
|
||||
printf(".equ %-40s, %5d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
default:
|
||||
printf("%s = %d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else /* if (elf.bits == 64) */
|
||||
{
|
||||
Elf64_Shdr shdr;
|
||||
for (i = 0; i < elf.hdr64.e_shnum; i++)
|
||||
{
|
||||
parse_elf_section(&elf, i, NULL, &shdr);
|
||||
|
||||
if (!elf.le_data)
|
||||
{
|
||||
log_msg("Big Endian data not supported yet!\n");
|
||||
goto bail;
|
||||
}\
|
||||
if (shdr.sh_type == SHT_SYMTAB)
|
||||
{
|
||||
for (ofst = shdr.sh_offset;
|
||||
ofst < shdr.sh_offset + shdr.sh_size;
|
||||
ofst += shdr.sh_entsize)
|
||||
{
|
||||
Elf64_Sym sym;
|
||||
|
||||
switch (mode)
|
||||
parse_elf_symbol(&elf, ofst, NULL, &sym);
|
||||
|
||||
/* For all OBJECTS (data objects), extract the value from the
|
||||
* proper data segment.
|
||||
*/
|
||||
/* if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
|
||||
log_msg("found data object %s\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name));
|
||||
*/
|
||||
|
||||
if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT
|
||||
&& sym.st_size == 4)
|
||||
{
|
||||
case OUTPUT_FMT_RVDS:
|
||||
printf("%-40s EQU %5d\n",
|
||||
parse_elf32_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
case OUTPUT_FMT_GAS:
|
||||
printf(".equ %-40s, %5d\n",
|
||||
parse_elf32_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
default:
|
||||
printf("%s = %d\n",
|
||||
parse_elf32_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
Elf64_Shdr dhdr;
|
||||
int val = 0;
|
||||
char section_name[128];
|
||||
|
||||
parse_elf_section(&elf, sym.st_shndx, NULL, &dhdr);
|
||||
|
||||
/* For explanition - refer to _MSC_VER version of code */
|
||||
strcpy(section_name, (char *)(elf.buf + strtab_off64 + dhdr.sh_name));
|
||||
/* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
|
||||
|
||||
if ((strcmp(section_name, ".bss")))
|
||||
{
|
||||
if (sizeof(val) != sym.st_size)
|
||||
{
|
||||
/* The target value is declared as an int in
|
||||
* asm_*_offsets.c, which is 4 bytes on all
|
||||
* targets we currently use. Complain loudly if
|
||||
* this is not true.
|
||||
*/
|
||||
log_msg("Symbol size is wrong\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
memcpy(&val,
|
||||
elf.buf + dhdr.sh_offset + sym.st_value,
|
||||
sym.st_size);
|
||||
}
|
||||
|
||||
if (!elf.le_data)
|
||||
{
|
||||
log_msg("Big Endian data not supported yet!\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case OUTPUT_FMT_RVDS:
|
||||
printf("%-40s EQU %5d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
case OUTPUT_FMT_GAS:
|
||||
printf(".equ %-40s, %5d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
default:
|
||||
printf("%s = %d\n",
|
||||
parse_elf_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -454,7 +736,7 @@ int parse_elf32(uint8_t *buf, size_t sz, output_fmt_t mode)
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
log_msg("Parse error: File does not appear to be valid ELF32\n");
|
||||
log_msg("Parse error: File does not appear to be valid ELF32 or ELF64\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -521,8 +803,7 @@ int main(int argc, char **argv)
|
||||
goto bail;
|
||||
}
|
||||
|
||||
res = parse_elf32(file_buf, stat_buf.st_size, mode);
|
||||
//res = parse_coff(file_buf, stat_buf.st_size);
|
||||
res = parse_elf(file_buf, stat_buf.st_size, mode);
|
||||
free(file_buf);
|
||||
|
||||
if (!res)
|
||||
@@ -535,7 +816,7 @@ bail:
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
/* See "Microsoft Portable Executable and Common Object File Format Specification"
|
||||
for reference.
|
||||
*/
|
||||
@@ -549,7 +830,6 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||
unsigned int i;
|
||||
unsigned __int8 *ptr;
|
||||
unsigned __int32 symoffset;
|
||||
FILE *fp;
|
||||
|
||||
char **sectionlist; //this array holds all section names in their correct order.
|
||||
//it is used to check if the symbol is in .bss or .data section.
|
||||
@@ -560,9 +840,18 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||
strtab_ptr = symtab_ptr + symtab_sz * 18;
|
||||
|
||||
if (nsections > 96)
|
||||
goto bail;
|
||||
{
|
||||
log_msg("Too many sections\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
sectionlist = malloc(nsections * sizeof * sectionlist);
|
||||
sectionlist = malloc(nsections * sizeof(sectionlist));
|
||||
|
||||
if (sectionlist == NULL)
|
||||
{
|
||||
log_msg("Allocating first level of section list failed\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
//log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
|
||||
|
||||
@@ -580,6 +869,12 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||
//log_msg("COFF: Parsing section %s\n",sectionname);
|
||||
|
||||
sectionlist[i] = malloc(strlen(sectionname) + 1);
|
||||
|
||||
if (sectionlist[i] == NULL)
|
||||
{
|
||||
log_msg("Allocating storage for %s failed\n", sectionname);
|
||||
goto bail;
|
||||
}
|
||||
strcpy(sectionlist[i], sectionname);
|
||||
|
||||
if (!strcmp(sectionname, ".data")) sectionrawdata_ptr = get_le32(ptr + 20);
|
||||
@@ -590,14 +885,6 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||
//log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
|
||||
//log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr);
|
||||
|
||||
fp = fopen("assembly_offsets.asm", "w");
|
||||
|
||||
if (fp == NULL)
|
||||
{
|
||||
perror("open file");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* The compiler puts the data with non-zero offset in .data section, but puts the data with
|
||||
zero offset in .bss section. So, if the data in in .bss section, set offset=0.
|
||||
Note from Wiki: In an object module compiled from C, the bss section contains
|
||||
@@ -631,13 +918,23 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||
char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
strncpy(name, ptr, 8);
|
||||
//log_msg("COFF: Parsing symbol %s\n",name);
|
||||
fprintf(fp, "%-40s EQU ", name);
|
||||
/* The 64bit Windows compiler doesn't prefix with an _.
|
||||
* Check what's there, and bump if necessary
|
||||
*/
|
||||
if (name[0] == '_')
|
||||
printf("%-40s EQU ", name + 1);
|
||||
else
|
||||
printf("%-40s EQU ", name);
|
||||
}
|
||||
else
|
||||
{
|
||||
//log_msg("COFF: Parsing symbol %s\n",
|
||||
// buf + strtab_ptr + get_le32(ptr+4));
|
||||
fprintf(fp, "%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
|
||||
if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
|
||||
printf("%-40s EQU ",
|
||||
buf + strtab_ptr + get_le32(ptr + 4) + 1);
|
||||
else
|
||||
printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
|
||||
}
|
||||
|
||||
if (!(strcmp(sectionlist[section-1], ".bss")))
|
||||
@@ -654,14 +951,13 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||
//log_msg(" Address: %u\n",get_le32(ptr+8));
|
||||
//log_msg(" Offset: %u\n", symoffset);
|
||||
|
||||
fprintf(fp, "%5d\n", symoffset);
|
||||
printf("%5d\n", symoffset);
|
||||
}
|
||||
|
||||
ptr += 18;
|
||||
}
|
||||
|
||||
fprintf(fp, " END\n");
|
||||
fclose(fp);
|
||||
printf(" END\n");
|
||||
|
||||
for (i = 0; i < nsections; i++)
|
||||
{
|
||||
@@ -711,11 +1007,7 @@ int main(int argc, char **argv)
|
||||
else
|
||||
f = argv[1];
|
||||
|
||||
if (_sopen_s(&fd, f, _O_BINARY, _SH_DENYNO, _S_IREAD | _S_IWRITE))
|
||||
{
|
||||
perror("Unable to open file");
|
||||
goto bail;
|
||||
}
|
||||
fd = _sopen(f, _O_BINARY, _SH_DENYNO, _S_IREAD | _S_IWRITE);
|
||||
|
||||
if (_fstat(fd, &stat_buf))
|
||||
{
|
||||
|
15
build/x86-msvs/obj_int_extract.bat
Normal file
15
build/x86-msvs/obj_int_extract.bat
Normal file
@@ -0,0 +1,15 @@
|
||||
REM Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
REM
|
||||
REM Use of this source code is governed by a BSD-style license
|
||||
REM that can be found in the LICENSE file in the root of the source
|
||||
REM tree. An additional intellectual property rights grant can be found
|
||||
REM in the file PATENTS. All contributing project authors may
|
||||
REM be found in the AUTHORS file in the root of the source tree.
|
||||
echo on
|
||||
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp8/common/asm_com_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp8/decoder/asm_dec_offsets.c"
|
||||
cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/asm_enc_offsets.c"
|
||||
obj_int_extract.exe rvds "asm_com_offsets.obj" > "asm_com_offsets.asm"
|
||||
obj_int_extract.exe rvds "asm_dec_offsets.obj" > "asm_dec_offsets.asm"
|
||||
obj_int_extract.exe rvds "asm_enc_offsets.obj" > "asm_enc_offsets.asm"
|
86
libs.mk
86
libs.mk
@@ -9,7 +9,13 @@
|
||||
##
|
||||
|
||||
|
||||
ASM:=$(if $(filter yes,$(CONFIG_GCC)),.asm.s,.asm)
|
||||
# ARM assembly files are written in RVCT-style. We use some make magic to
|
||||
# filter those files to allow GCC compilation
|
||||
ifeq ($(ARCH_ARM),yes)
|
||||
ASM:=$(if $(filter yes,$(CONFIG_GCC)),.asm.s,.asm)
|
||||
else
|
||||
ASM:=.asm
|
||||
endif
|
||||
|
||||
CODEC_SRCS-yes += libs.mk
|
||||
|
||||
@@ -126,6 +132,23 @@ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(call enabled,CODEC_EXPORTS)
|
||||
ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
|
||||
ifeq ($(CONFIG_MSVS),yes)
|
||||
|
||||
obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c
|
||||
@cp $(SRC_PATH_BARE)/build/x86-msvs/obj_int_extract.bat .
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
--exe \
|
||||
--target=$(TOOLCHAIN) \
|
||||
--name=obj_int_extract \
|
||||
--ver=$(CONFIG_VS_VERSION) \
|
||||
--proj-guid=E1360C65-D375-4335-8057-7ED99CC3F9B2 \
|
||||
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
|
||||
--out=$@ $^ \
|
||||
-I. \
|
||||
-I"$(SRC_PATH_BARE)" \
|
||||
|
||||
PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.vcproj
|
||||
PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat
|
||||
|
||||
vpx.def: $(call enabled,CODEC_EXPORTS)
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
|
||||
@@ -135,15 +158,16 @@ CLEAN-OBJS += vpx.def
|
||||
|
||||
vpx.vcproj: $(CODEC_SRCS) vpx.def
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\
|
||||
--lib\
|
||||
--target=$(TOOLCHAIN)\
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
--lib \
|
||||
--target=$(TOOLCHAIN) \
|
||||
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
|
||||
--name=vpx\
|
||||
--proj-guid=DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74\
|
||||
--module-def=vpx.def\
|
||||
--ver=$(CONFIG_VS_VERSION)\
|
||||
--out=$@ $(CFLAGS) $^\
|
||||
--name=vpx \
|
||||
--proj-guid=DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74 \
|
||||
--module-def=vpx.def \
|
||||
--ver=$(CONFIG_VS_VERSION) \
|
||||
--out=$@ $(CFLAGS) $^ \
|
||||
--src-path-bare="$(SRC_PATH_BARE)" \
|
||||
|
||||
PROJECTS-$(BUILD_LIBVPX) += vpx.vcproj
|
||||
|
||||
@@ -207,36 +231,38 @@ endif
|
||||
#
|
||||
# Add assembler dependencies for configuration and offsets
|
||||
#
|
||||
$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
$(filter %.asm.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
|
||||
#
|
||||
# Calculate platform- and compiler-specific offsets for hand coded assembly
|
||||
#
|
||||
ifeq ($(ARCH_ARM), yes)
|
||||
asm_com_offsets.asm: obj_int_extract
|
||||
asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o
|
||||
ifeq ($(CONFIG_EXTERNAL_BUILD),) # Visual Studio uses obj_int_extract.bat
|
||||
ifeq ($(ARCH_ARM), yes)
|
||||
asm_com_offsets.asm: obj_int_extract
|
||||
asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o
|
||||
./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o
|
||||
CLEAN-OBJS += asm_com_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm
|
||||
OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o
|
||||
CLEAN-OBJS += asm_com_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm
|
||||
|
||||
ifeq ($(CONFIG_VP8_ENCODER), yes)
|
||||
asm_enc_offsets.asm: obj_int_extract
|
||||
asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
|
||||
ifeq ($(CONFIG_VP8_ENCODER), yes)
|
||||
asm_enc_offsets.asm: obj_int_extract
|
||||
asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
|
||||
./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
|
||||
CLEAN-OBJS += asm_enc_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
|
||||
endif
|
||||
OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
|
||||
CLEAN-OBJS += asm_enc_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP8_DECODER), yes)
|
||||
asm_dec_offsets.asm: obj_int_extract
|
||||
asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
|
||||
ifeq ($(CONFIG_VP8_DECODER), yes)
|
||||
asm_dec_offsets.asm: obj_int_extract
|
||||
asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
|
||||
./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
|
||||
CLEAN-OBJS += asm_dec_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
|
||||
OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
|
||||
CLEAN-OBJS += asm_dec_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@@ -13,8 +13,9 @@ vpx.sln: $(wildcard *.vcproj)
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
|
||||
$(if $(filter %vpx.vcproj,$^),\
|
||||
$(foreach vcp,$(filter-out %vpx.vcproj,$^),\
|
||||
$(foreach vcp,$(filter-out %vpx.vcproj %obj_int_extract.vcproj,$^),\
|
||||
--dep=$(vcp:.vcproj=):vpx)) \
|
||||
--dep=vpx:obj_int_extract \
|
||||
--ver=$(CONFIG_VS_VERSION)\
|
||||
--out=$@ $^
|
||||
vpx.sln.mk: vpx.sln
|
||||
|
@@ -20,7 +20,7 @@
|
||||
|
||||
extern void vp8_init_scan_order_mask();
|
||||
|
||||
void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols)
|
||||
static void update_mode_info_border(MODE_INFO *mi, int rows, int cols)
|
||||
{
|
||||
int i;
|
||||
vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
|
||||
@@ -119,7 +119,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
return 1;
|
||||
}
|
||||
|
||||
vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
|
||||
update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -19,14 +19,6 @@
|
||||
#include "vp8/common/idct.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
|
||||
|
||||
extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
|
||||
|
||||
void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
@@ -106,31 +98,12 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
||||
rtcd->recon.recon2 = vp8_recon2b_neon;
|
||||
rtcd->recon.recon4 = vp8_recon4b_neon;
|
||||
rtcd->recon.recon_mb = vp8_recon_mb_neon;
|
||||
|
||||
rtcd->recon.build_intra_predictors_mby =
|
||||
vp8_build_intra_predictors_mby_neon;
|
||||
rtcd->recon.build_intra_predictors_mby_s =
|
||||
vp8_build_intra_predictors_mby_s_neon;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV6
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
if (has_media)
|
||||
#endif
|
||||
{
|
||||
vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
|
||||
vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
if (has_neon)
|
||||
#endif
|
||||
{
|
||||
vp8_build_intra_predictors_mby_ptr =
|
||||
vp8_build_intra_predictors_mby_neon;
|
||||
vp8_build_intra_predictors_mby_s_ptr =
|
||||
vp8_build_intra_predictors_mby_s_neon;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@@ -53,6 +53,9 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);
|
||||
|
||||
extern prototype_recon_macroblock(vp8_recon_mb_neon);
|
||||
|
||||
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_neon);
|
||||
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mby_s_neon);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_recon_recon
|
||||
#define vp8_recon_recon vp8_recon_b_neon
|
||||
@@ -74,6 +77,13 @@ extern prototype_recon_macroblock(vp8_recon_mb_neon);
|
||||
|
||||
#undef vp8_recon_recon_mb
|
||||
#define vp8_recon_recon_mb vp8_recon_mb_neon
|
||||
|
||||
#undef vp8_recon_build_intra_predictors_mby
|
||||
#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby_neon
|
||||
|
||||
#undef vp8_recon_build_intra_predictors_mby_s
|
||||
#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_neon
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@@ -38,7 +38,7 @@ DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
};
|
||||
|
||||
void vp8_filter_block2d_first_pass
|
||||
static void filter_block2d_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int *output_ptr,
|
||||
@@ -82,7 +82,7 @@ void vp8_filter_block2d_first_pass
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_block2d_second_pass
|
||||
static void filter_block2d_second_pass
|
||||
(
|
||||
int *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
@@ -129,7 +129,7 @@ void vp8_filter_block2d_second_pass
|
||||
}
|
||||
|
||||
|
||||
void vp8_filter_block2d
|
||||
static void filter_block2d
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
@@ -142,39 +142,13 @@ void vp8_filter_block2d
|
||||
int FData[9*4]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
|
||||
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_block_variation_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int *HVar,
|
||||
int *VVar
|
||||
)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *Ptr = src_ptr;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
*HVar += abs((int)Ptr[j] - (int)Ptr[j+1]);
|
||||
*VVar += abs((int)Ptr[j] - (int)Ptr[j+src_pixels_per_line]);
|
||||
}
|
||||
|
||||
Ptr += src_pixels_per_line;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void vp8_sixtap_predict_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
@@ -191,7 +165,7 @@ void vp8_sixtap_predict_c
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
}
|
||||
void vp8_sixtap_predict8x8_c
|
||||
(
|
||||
@@ -211,11 +185,11 @@ void vp8_sixtap_predict8x8_c
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
@@ -237,11 +211,11 @@ void vp8_sixtap_predict8x4_c
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
@@ -264,10 +238,10 @@ void vp8_sixtap_predict16x16_c
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
|
||||
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
|
||||
|
||||
}
|
||||
|
||||
@@ -294,7 +268,7 @@ void vp8_sixtap_predict16x16_c
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_filter_block2d_bil_first_pass
|
||||
static void filter_block2d_bil_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
@@ -345,7 +319,7 @@ void vp8_filter_block2d_bil_first_pass
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_filter_block2d_bil_second_pass
|
||||
static void filter_block2d_bil_second_pass
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
@@ -399,7 +373,7 @@ void vp8_filter_block2d_bil_second_pass
|
||||
* SPECIAL NOTES : The largest block size can be handled here is 16x16
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_filter_block2d_bil
|
||||
static void filter_block2d_bil
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
@@ -415,10 +389,10 @@ void vp8_filter_block2d_bil
|
||||
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
@@ -444,19 +418,19 @@ void vp8_bilinear_predict4x4_c
|
||||
unsigned char temp2[16];
|
||||
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
if (temp1[i] != temp2[i])
|
||||
{
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
|
||||
}
|
||||
|
||||
@@ -476,7 +450,7 @@ void vp8_bilinear_predict8x8_c
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
|
||||
}
|
||||
|
||||
@@ -496,7 +470,7 @@ void vp8_bilinear_predict8x4_c
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
|
||||
}
|
||||
|
||||
@@ -516,5 +490,5 @@ void vp8_bilinear_predict16x16_c
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
||||
|
@@ -11,6 +11,13 @@
|
||||
|
||||
#include "findnearmv.h"
|
||||
|
||||
const unsigned char vp8_mbsplit_offset[4][16] = {
|
||||
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
|
||||
};
|
||||
|
||||
/* Predict motion vectors using those from already-decoded nearby blocks.
|
||||
Note that we only consider one 4x4 subblock from each candidate 16x16
|
||||
macroblock. */
|
||||
|
@@ -70,4 +70,6 @@ const B_MODE_INFO *vp8_left_bmi(const MODE_INFO *cur_mb, int b);
|
||||
|
||||
const B_MODE_INFO *vp8_above_bmi(const MODE_INFO *cur_mb, int b, int mi_stride);
|
||||
|
||||
extern const unsigned char vp8_mbsplit_offset[4][16];
|
||||
|
||||
#endif
|
||||
|
@@ -20,12 +20,6 @@
|
||||
extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
|
||||
extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
|
||||
|
||||
void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
|
||||
|
||||
void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
|
||||
|
||||
void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
@@ -45,6 +39,10 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
rtcd->recon.recon4 = vp8_recon4b_c;
|
||||
rtcd->recon.recon_mb = vp8_recon_mb_c;
|
||||
rtcd->recon.recon_mby = vp8_recon_mby_c;
|
||||
rtcd->recon.build_intra_predictors_mby =
|
||||
vp8_build_intra_predictors_mby;
|
||||
rtcd->recon.build_intra_predictors_mby_s =
|
||||
vp8_build_intra_predictors_mby_s;
|
||||
|
||||
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c;
|
||||
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c;
|
||||
@@ -75,9 +73,6 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
/* Pure C: */
|
||||
vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
|
||||
vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
vp8_arch_x86_common_init(ctx);
|
||||
|
@@ -17,7 +17,7 @@ typedef enum
|
||||
DEST = 1
|
||||
} BLOCKSET;
|
||||
|
||||
void vp8_setup_block
|
||||
static void setup_block
|
||||
(
|
||||
BLOCKD *b,
|
||||
int mv_stride,
|
||||
@@ -43,7 +43,8 @@ void vp8_setup_block
|
||||
|
||||
}
|
||||
|
||||
void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
|
||||
|
||||
static void setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
|
||||
{
|
||||
int block;
|
||||
|
||||
@@ -64,16 +65,16 @@ void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
|
||||
|
||||
for (block = 0; block < 16; block++) /* y blocks */
|
||||
{
|
||||
vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
|
||||
setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
|
||||
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs);
|
||||
}
|
||||
|
||||
for (block = 16; block < 20; block++) /* U and V blocks */
|
||||
{
|
||||
vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
|
||||
setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
|
||||
|
||||
vp8_setup_block(&x->block[block+4], x->dst.uv_stride, v, x->dst.uv_stride,
|
||||
setup_block(&x->block[block+4], x->dst.uv_stride, v, x->dst.uv_stride,
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
|
||||
}
|
||||
}
|
||||
@@ -124,6 +125,6 @@ void vp8_build_block_doffsets(MACROBLOCKD *x)
|
||||
{
|
||||
|
||||
/* handle the destination pitch features */
|
||||
vp8_setup_macroblock(x, DEST);
|
||||
vp8_setup_macroblock(x, PRED);
|
||||
setup_macroblock(x, DEST);
|
||||
setup_macroblock(x, PRED);
|
||||
}
|
||||
|
@@ -211,7 +211,7 @@ void vp8_post_proc_down_and_across_c
|
||||
}
|
||||
}
|
||||
|
||||
int vp8_q2mbl(int x)
|
||||
static int q2mbl(int x)
|
||||
{
|
||||
if (x < 20) x = 20;
|
||||
|
||||
@@ -314,8 +314,8 @@ static void vp8_deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
|
||||
(void) flag;
|
||||
|
||||
POSTPROC_INVOKE(rtcd, downacross)(source->y_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl);
|
||||
POSTPROC_INVOKE(rtcd, across)(post->y_buffer, post->y_stride, post->y_height, post->y_width, vp8_q2mbl(q));
|
||||
POSTPROC_INVOKE(rtcd, down)(post->y_buffer, post->y_stride, post->y_height, post->y_width, vp8_q2mbl(q));
|
||||
POSTPROC_INVOKE(rtcd, across)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
|
||||
POSTPROC_INVOKE(rtcd, down)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
|
||||
|
||||
POSTPROC_INVOKE(rtcd, downacross)(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
|
||||
POSTPROC_INVOKE(rtcd, downacross)(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
|
||||
|
@@ -23,6 +23,9 @@
|
||||
#define prototype_recon_macroblock(sym) \
|
||||
void sym(const struct vp8_recon_rtcd_vtable *rtcd, MACROBLOCKD *x)
|
||||
|
||||
#define prototype_build_intra_predictors(sym) \
|
||||
void sym(MACROBLOCKD *x)
|
||||
|
||||
struct vp8_recon_rtcd_vtable;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
@@ -73,9 +76,23 @@ extern prototype_recon_macroblock(vp8_recon_recon_mb);
|
||||
#endif
|
||||
extern prototype_recon_macroblock(vp8_recon_recon_mby);
|
||||
|
||||
#ifndef vp8_recon_build_intra_predictors_mby
|
||||
#define vp8_recon_build_intra_predictors_mby vp8_build_intra_predictors_mby
|
||||
#endif
|
||||
extern prototype_build_intra_predictors\
|
||||
(vp8_recon_build_intra_predictors_mby);
|
||||
|
||||
#ifndef vp8_recon_build_intra_predictors_mby_s
|
||||
#define vp8_recon_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s
|
||||
#endif
|
||||
extern prototype_build_intra_predictors\
|
||||
(vp8_recon_build_intra_predictors_mby_s);
|
||||
|
||||
|
||||
typedef prototype_copy_block((*vp8_copy_block_fn_t));
|
||||
typedef prototype_recon_block((*vp8_recon_fn_t));
|
||||
typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t));
|
||||
typedef prototype_build_intra_predictors((*vp8_build_intra_pred_fn_t));
|
||||
typedef struct vp8_recon_rtcd_vtable
|
||||
{
|
||||
vp8_copy_block_fn_t copy16x16;
|
||||
@@ -86,6 +103,8 @@ typedef struct vp8_recon_rtcd_vtable
|
||||
vp8_recon_fn_t recon4;
|
||||
vp8_recon_mb_fn_t recon_mb;
|
||||
vp8_recon_mb_fn_t recon_mby;
|
||||
vp8_build_intra_pred_fn_t build_intra_predictors_mby_s;
|
||||
vp8_build_intra_pred_fn_t build_intra_predictors_mby;
|
||||
} vp8_recon_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@@ -168,7 +168,7 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf)
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
|
||||
static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
@@ -187,7 +187,7 @@ void vp8_build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
|
||||
static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
@@ -246,7 +246,7 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
vp8_build_inter_predictors2b(x, d0, 8);
|
||||
build_inter_predictors2b(x, d0, 8);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
|
||||
@@ -291,7 +291,7 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
BLOCKD *d = &x->block[bbb[i]];
|
||||
vp8_build_inter_predictors4b(x, d, 16);
|
||||
build_inter_predictors4b(x, d, 16);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -303,7 +303,7 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
vp8_build_inter_predictors2b(x, d0, 16);
|
||||
build_inter_predictors2b(x, d0, 16);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
|
||||
@@ -372,7 +372,7 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
BLOCKD *d = &x->block[bbb[i]];
|
||||
vp8_build_inter_predictors4b(x, d, 16);
|
||||
build_inter_predictors4b(x, d, 16);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -383,7 +383,7 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
vp8_build_inter_predictors2b(x, d0, 16);
|
||||
build_inter_predictors2b(x, d0, 16);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
|
||||
@@ -400,7 +400,7 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
vp8_build_inter_predictors2b(x, d0, 8);
|
||||
build_inter_predictors2b(x, d0, 8);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
|
||||
@@ -600,7 +600,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
BLOCKD *d = &x->block[bbb[i]];
|
||||
/*vp8_build_inter_predictors4b(x, d, 16);*/
|
||||
/*build_inter_predictors4b(x, d, 16);*/
|
||||
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
@@ -630,7 +630,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
{
|
||||
/*vp8_build_inter_predictors2b(x, d0, 16);*/
|
||||
/*build_inter_predictors2b(x, d0, 16);*/
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
unsigned char *pred_ptr = d0->predictor;
|
||||
@@ -662,7 +662,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
{
|
||||
/*vp8_build_inter_predictors2b(x, d0, 8);*/
|
||||
/*build_inter_predictors2b(x, d0, 8);*/
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
unsigned char *pred_ptr = d0->predictor;
|
||||
|
@@ -14,13 +14,6 @@
|
||||
|
||||
extern void init_intra_left_above_pixels(MACROBLOCKD *x);
|
||||
|
||||
extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
|
||||
extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
|
||||
|
||||
extern void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x);
|
||||
|
||||
|
@@ -38,6 +38,7 @@
|
||||
#define pthread_self() GetCurrentThreadId()
|
||||
#else
|
||||
#ifdef __APPLE__
|
||||
#include <mach/mach_init.h>
|
||||
#include <mach/semaphore.h>
|
||||
#include <mach/task.h>
|
||||
#include <time.h>
|
||||
|
@@ -113,97 +113,6 @@ nextrow:
|
||||
ret
|
||||
|
||||
|
||||
;
|
||||
; THIS FUNCTION APPEARS TO BE UNUSED
|
||||
;
|
||||
;void vp8_filter_block1d_v6_mmx
|
||||
;(
|
||||
; short *src_ptr,
|
||||
; unsigned char *output_ptr,
|
||||
; unsigned int pixels_per_line,
|
||||
; unsigned int pixel_step,
|
||||
; unsigned int output_height,
|
||||
; unsigned int output_width,
|
||||
; short * vp8_filter
|
||||
;)
|
||||
global sym(vp8_filter_block1d_v6_mmx)
|
||||
sym(vp8_filter_block1d_v6_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movq mm5, [GLOBAL(rd)]
|
||||
push rbx
|
||||
mov rbx, arg(6) ;vp8_filter
|
||||
movq mm1, [rbx + 16] ; do both the negative taps first!!!
|
||||
movq mm2, [rbx + 32] ;
|
||||
movq mm6, [rbx + 48] ;
|
||||
movq mm7, [rbx + 64] ;
|
||||
|
||||
movsxd rdx, dword ptr arg(2) ;pixels_per_line
|
||||
mov rdi, arg(1) ;output_ptr
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
sub rsi, rdx
|
||||
sub rsi, rdx
|
||||
movsxd rcx, DWORD PTR arg(4) ;output_height
|
||||
movsxd rax, DWORD PTR arg(5) ;output_width ; destination pitch?
|
||||
pxor mm0, mm0 ; mm0 = 00000000
|
||||
|
||||
|
||||
nextrow_v:
|
||||
movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1
|
||||
pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers.
|
||||
|
||||
|
||||
movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2
|
||||
pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0
|
||||
pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
movq mm4, [rsi] ; mm4 = p0..p3 = row -2
|
||||
pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
|
||||
add rsi, rdx ; move source forward 1 line to avoid 3 * pitch
|
||||
movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1
|
||||
pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3
|
||||
pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
|
||||
paddsw mm3, mm5 ; mm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
|
||||
packuswb mm3, mm0 ; pack and saturate
|
||||
|
||||
movd [rdi],mm3 ; store the results in the destination
|
||||
|
||||
add rdi,rax;
|
||||
|
||||
dec rcx ; decrement count
|
||||
jnz nextrow_v ; next row
|
||||
|
||||
pop rbx
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_filter_block1dc_v6_mmx
|
||||
;(
|
||||
; short *src_ptr,
|
||||
|
@@ -228,15 +228,8 @@ unsigned int vp8_mv_cont_count[5][4] =
|
||||
};
|
||||
#endif
|
||||
|
||||
unsigned char vp8_mbsplit_offset[4][16] = {
|
||||
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
|
||||
};
|
||||
|
||||
unsigned char vp8_mbsplit_fill_count[4] = {8, 8, 4, 1};
|
||||
unsigned char vp8_mbsplit_fill_offset[4][16] = {
|
||||
static const unsigned char mbsplit_fill_count[4] = {8, 8, 4, 1};
|
||||
static const unsigned char mbsplit_fill_offset[4][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{ 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15},
|
||||
{ 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15},
|
||||
@@ -246,7 +239,7 @@ unsigned char vp8_mbsplit_fill_offset[4][16] = {
|
||||
|
||||
|
||||
|
||||
void vp8_mb_mode_mv_init(VP8D_COMP *pbi)
|
||||
static void mb_mode_mv_init(VP8D_COMP *pbi)
|
||||
{
|
||||
vp8_reader *const bc = & pbi->bc;
|
||||
MV_CONTEXT *const mvc = pbi->common.fc.mvc;
|
||||
@@ -287,7 +280,7 @@ void vp8_mb_mode_mv_init(VP8D_COMP *pbi)
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
|
||||
static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
|
||||
int mb_row, int mb_col)
|
||||
{
|
||||
const MV Zero = { 0, 0};
|
||||
@@ -405,10 +398,10 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
|
||||
/* Fill (uniform) modes, mvs of jth subset.
|
||||
Must do it here because ensuing subsets can
|
||||
refer back to us via "left" or "above". */
|
||||
unsigned char *fill_offset;
|
||||
unsigned int fill_count = vp8_mbsplit_fill_count[s];
|
||||
const unsigned char *fill_offset;
|
||||
unsigned int fill_count = mbsplit_fill_count[s];
|
||||
|
||||
fill_offset = &vp8_mbsplit_fill_offset[s][(unsigned char)j * vp8_mbsplit_fill_count[s]];
|
||||
fill_offset = &mbsplit_fill_offset[s][(unsigned char)j * mbsplit_fill_count[s]];
|
||||
|
||||
do {
|
||||
mi->bmi[ *fill_offset] = bmi;
|
||||
@@ -525,7 +518,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
|
||||
MODE_INFO *mi = pbi->common.mi;
|
||||
int mb_row = -1;
|
||||
|
||||
vp8_mb_mode_mv_init(pbi);
|
||||
mb_mode_mv_init(pbi);
|
||||
|
||||
while (++mb_row < pbi->common.mb_rows)
|
||||
{
|
||||
@@ -543,11 +536,11 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
|
||||
|
||||
while (++mb_col < pbi->common.mb_cols)
|
||||
{
|
||||
/*vp8_read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/
|
||||
/*read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/
|
||||
if(pbi->common.frame_type == KEY_FRAME)
|
||||
vp8_kfread_modes(pbi, mi, mb_row, mb_col);
|
||||
else
|
||||
vp8_read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
|
||||
read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
|
||||
|
||||
mi++; /* next macroblock */
|
||||
}
|
||||
|
@@ -115,8 +115,8 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
{
|
||||
|
||||
vp8_build_intra_predictors_mbuv_s(xd);
|
||||
vp8_build_intra_predictors_mby_s_ptr(xd);
|
||||
|
||||
RECON_INVOKE(&pbi->common.rtcd.recon,
|
||||
build_intra_predictors_mby_s)(xd);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -175,7 +175,7 @@ void clamp_mvs(MACROBLOCKD *xd)
|
||||
|
||||
}
|
||||
|
||||
void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
{
|
||||
int eobtotal = 0;
|
||||
int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
|
||||
@@ -214,7 +214,8 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
|
||||
if (xd->mode_info_context->mbmi.mode != B_PRED)
|
||||
{
|
||||
vp8_build_intra_predictors_mby_ptr(xd);
|
||||
RECON_INVOKE(&pbi->common.rtcd.recon,
|
||||
build_intra_predictors_mby)(xd);
|
||||
} else {
|
||||
vp8_intra_prediction_down_copy(xd);
|
||||
}
|
||||
@@ -319,10 +320,8 @@ FILE *vpxlog = 0;
|
||||
|
||||
|
||||
|
||||
void vp8_decode_mb_row(VP8D_COMP *pbi,
|
||||
VP8_COMMON *pc,
|
||||
int mb_row,
|
||||
MACROBLOCKD *xd)
|
||||
static void
|
||||
decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd)
|
||||
{
|
||||
|
||||
int i;
|
||||
@@ -394,7 +393,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
|
||||
else
|
||||
pbi->debugoutput =0;
|
||||
*/
|
||||
vp8_decode_macroblock(pbi, xd);
|
||||
decode_macroblock(pbi, xd);
|
||||
|
||||
/* check if the boolean decoder has suffered an error */
|
||||
xd->corrupted |= vp8dx_bool_error(xd->current_bc);
|
||||
@@ -900,7 +899,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
ibc = 0;
|
||||
}
|
||||
|
||||
vp8_decode_mb_row(pbi, pc, mb_row, xd);
|
||||
decode_mb_row(pbi, pc, mb_row, xd);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -19,7 +19,13 @@
|
||||
#define BOOL_DATA UINT8
|
||||
|
||||
#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
|
||||
DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
|
||||
DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) =
|
||||
{
|
||||
0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X,
|
||||
6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X,
|
||||
6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X,
|
||||
6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X
|
||||
};
|
||||
#define EOB_CONTEXT_NODE 0
|
||||
#define ZERO_CONTEXT_NODE 1
|
||||
#define ONE_CONTEXT_NODE 2
|
||||
@@ -135,7 +141,7 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
|
||||
Prob = coef_probs; \
|
||||
if(c<15) {\
|
||||
++c; \
|
||||
Prob += vp8_coef_bands_x[c]; \
|
||||
Prob += coef_bands_x[c]; \
|
||||
goto branch; \
|
||||
} goto BLOCK_FINISHED; /*for malformed input */\
|
||||
} \
|
||||
@@ -244,7 +250,7 @@ BLOCK_LOOP:
|
||||
Prob += v * ENTROPY_NODES;
|
||||
|
||||
DO_WHILE:
|
||||
Prob += vp8_coef_bands_x[c];
|
||||
Prob += coef_bands_x[c];
|
||||
DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
|
||||
|
||||
CHECK_0_:
|
||||
|
@@ -37,43 +37,6 @@
|
||||
extern void vp8_init_loop_filter(VP8_COMMON *cm);
|
||||
extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
|
||||
|
||||
#if CONFIG_DEBUG
|
||||
void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
|
||||
{
|
||||
FILE *yuv_file = fopen((char *)name, "ab");
|
||||
unsigned char *src = s->y_buffer;
|
||||
int h = s->y_height;
|
||||
|
||||
do
|
||||
{
|
||||
fwrite(src, s->y_width, 1, yuv_file);
|
||||
src += s->y_stride;
|
||||
}
|
||||
while (--h);
|
||||
|
||||
src = s->u_buffer;
|
||||
h = s->uv_height;
|
||||
|
||||
do
|
||||
{
|
||||
fwrite(src, s->uv_width, 1, yuv_file);
|
||||
src += s->uv_stride;
|
||||
}
|
||||
while (--h);
|
||||
|
||||
src = s->v_buffer;
|
||||
h = s->uv_height;
|
||||
|
||||
do
|
||||
{
|
||||
fwrite(src, s->uv_width, 1, yuv_file);
|
||||
src += s->uv_stride;
|
||||
}
|
||||
while (--h);
|
||||
|
||||
fclose(yuv_file);
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8dx_initialize()
|
||||
{
|
||||
@@ -155,35 +118,6 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr)
|
||||
}
|
||||
|
||||
|
||||
void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x)
|
||||
{
|
||||
VP8D_COMP *pbi = (VP8D_COMP *) comp;
|
||||
|
||||
(void) pbi;
|
||||
(void) x;
|
||||
|
||||
switch (oxst)
|
||||
{
|
||||
case VP8D_OK:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst)
|
||||
{
|
||||
VP8D_COMP *pbi = (VP8D_COMP *) comp;
|
||||
|
||||
(void) pbi;
|
||||
|
||||
switch (oxst)
|
||||
{
|
||||
case VP8D_OK:
|
||||
break;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
|
||||
{
|
||||
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
|
||||
@@ -203,6 +137,8 @@ int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
|
||||
{
|
||||
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
|
||||
@@ -459,12 +395,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* DEBUG code */
|
||||
/*vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);*/
|
||||
if (cm->current_video_frame <= 5)
|
||||
write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
|
||||
#endif
|
||||
|
||||
vp8_clear_system_state();
|
||||
|
||||
|
@@ -113,6 +113,7 @@ typedef struct VP8Decompressor
|
||||
pthread_t *h_decoding_thread;
|
||||
sem_t *h_event_start_decoding;
|
||||
sem_t h_event_end_decoding;
|
||||
sem_t *h_mb_counter;
|
||||
/* end of threading data */
|
||||
#endif
|
||||
|
||||
|
@@ -12,9 +12,6 @@
|
||||
#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
#ifdef __APPLE__
|
||||
#include <mach/mach_init.h>
|
||||
#endif
|
||||
#include "onyxd_int.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vp8/common/threading.h"
|
||||
@@ -36,7 +33,7 @@ extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
|
||||
#define RTCD_VTABLE(x) NULL
|
||||
#endif
|
||||
|
||||
void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
|
||||
static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
|
||||
{
|
||||
VP8_COMMON *const pc = & pbi->common;
|
||||
int i, j;
|
||||
@@ -90,7 +87,7 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
|
||||
}
|
||||
|
||||
|
||||
void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
|
||||
static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
|
||||
{
|
||||
int eobtotal = 0;
|
||||
int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
|
||||
@@ -217,7 +214,7 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
|
||||
}
|
||||
|
||||
|
||||
THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
||||
static THREAD_FUNCTION thread_decoding_proc(void *p_data)
|
||||
{
|
||||
int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
|
||||
VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
|
||||
@@ -278,6 +275,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
||||
|
||||
for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
|
||||
{
|
||||
/*
|
||||
if ((mb_col & (nsync-1)) == 0)
|
||||
{
|
||||
while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
|
||||
@@ -286,6 +284,8 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
||||
thread_sleep(0);
|
||||
}
|
||||
}
|
||||
*/
|
||||
sem_wait(&pbi->h_mb_counter[ithread]);
|
||||
|
||||
if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
|
||||
{
|
||||
@@ -296,18 +296,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
||||
}
|
||||
}
|
||||
|
||||
if(pbi->common.filter_level)
|
||||
{
|
||||
/*update loopfilter info*/
|
||||
Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
|
||||
filter_level = pbi->mt_baseline_filter_level[Segment];
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
|
||||
}
|
||||
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
*/
|
||||
@@ -333,7 +321,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
||||
xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
|
||||
|
||||
vp8_build_uvmvs(xd, pc->full_pixel);
|
||||
vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
|
||||
decode_macroblock(pbi, xd, mb_row, mb_col);
|
||||
|
||||
if (pbi->common.filter_level)
|
||||
{
|
||||
@@ -362,7 +350,16 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
||||
}
|
||||
}
|
||||
|
||||
/* loopfilter on this macroblock. */
|
||||
/* update loopfilter info */
|
||||
Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
|
||||
filter_level = pbi->mt_baseline_filter_level[Segment];
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
|
||||
|
||||
/* loopfilter on this macroblock. */
|
||||
if (filter_level)
|
||||
{
|
||||
if (mb_col > 0)
|
||||
@@ -389,6 +386,9 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
|
||||
|
||||
/*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
|
||||
pbi->mt_current_mb_col[mb_row] = mb_col;
|
||||
|
||||
if (mb_row != pbi->common.mb_rows-1)
|
||||
sem_post(&pbi->h_mb_counter[ithread+1]);
|
||||
}
|
||||
|
||||
/* adjust to the next row of mbs */
|
||||
@@ -444,6 +444,7 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
|
||||
|
||||
CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
|
||||
CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
|
||||
CHECK_MEM_ERROR(pbi->h_mb_counter, vpx_malloc(sizeof(sem_t) * (pbi->decoding_thread_count + 1)));
|
||||
CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
|
||||
vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
|
||||
CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
|
||||
@@ -456,9 +457,12 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
|
||||
pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
|
||||
pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread];
|
||||
|
||||
pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
|
||||
pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread]));
|
||||
}
|
||||
|
||||
for (ithread = 0; ithread < pbi->decoding_thread_count + 1; ithread++)
|
||||
sem_init(&pbi->h_mb_counter[ithread], 0, 0);
|
||||
|
||||
sem_init(&pbi->h_event_end_decoding, 0, 0);
|
||||
|
||||
pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
|
||||
@@ -621,6 +625,9 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
|
||||
sem_destroy(&pbi->h_event_start_decoding[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < pbi->decoding_thread_count + 1; i++)
|
||||
sem_destroy(&pbi->h_mb_counter[i]);
|
||||
|
||||
sem_destroy(&pbi->h_event_end_decoding);
|
||||
|
||||
vpx_free(pbi->h_decoding_thread);
|
||||
@@ -629,6 +636,11 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
|
||||
vpx_free(pbi->h_event_start_decoding);
|
||||
pbi->h_event_start_decoding = NULL;
|
||||
|
||||
if (pbi->h_mb_counter)
|
||||
{
|
||||
vpx_free(pbi->h_mb_counter);
|
||||
pbi->h_mb_counter = NULL;
|
||||
}
|
||||
vpx_free(pbi->mb_row_di);
|
||||
pbi->mb_row_di = NULL ;
|
||||
|
||||
@@ -638,7 +650,7 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
|
||||
}
|
||||
|
||||
|
||||
void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
|
||||
static void lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
|
||||
{
|
||||
VP8_COMMON *cm = &pbi->common;
|
||||
MACROBLOCKD *mbd = &pbi->mb;
|
||||
@@ -721,14 +733,17 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
|
||||
vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
|
||||
}
|
||||
vp8mt_lpf_init(pbi, pc->filter_level);
|
||||
lpf_init(pbi, pc->filter_level);
|
||||
}
|
||||
|
||||
vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
|
||||
setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
|
||||
|
||||
|
||||
|
||||
for (i = 0; i < pbi->decoding_thread_count; i++)
|
||||
sem_post(&pbi->h_event_start_decoding[i]);
|
||||
|
||||
|
||||
for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
|
||||
{
|
||||
|
||||
@@ -761,6 +776,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
|
||||
for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
|
||||
{
|
||||
/*
|
||||
if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
|
||||
while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
|
||||
{
|
||||
@@ -768,6 +784,9 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
thread_sleep(0);
|
||||
}
|
||||
}
|
||||
*/
|
||||
if(mb_row > 0)
|
||||
sem_wait(&pbi->h_mb_counter[pbi->decoding_thread_count]);
|
||||
|
||||
if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
|
||||
{
|
||||
@@ -778,18 +797,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
}
|
||||
}
|
||||
|
||||
if(pbi->common.filter_level)
|
||||
{
|
||||
/* update loopfilter info */
|
||||
Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
|
||||
filter_level = pbi->mt_baseline_filter_level[Segment];
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
|
||||
}
|
||||
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
*/
|
||||
@@ -821,7 +828,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
}
|
||||
|
||||
vp8_build_uvmvs(xd, pc->full_pixel);
|
||||
vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
|
||||
decode_macroblock(pbi, xd, mb_row, mb_col);
|
||||
|
||||
/* check if the boolean decoder has suffered an error */
|
||||
xd->corrupted |= vp8dx_bool_error(xd->current_bc);
|
||||
@@ -853,6 +860,15 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
}
|
||||
}
|
||||
|
||||
/* update loopfilter info */
|
||||
Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
|
||||
filter_level = pbi->mt_baseline_filter_level[Segment];
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
|
||||
|
||||
/* loopfilter on this macroblock. */
|
||||
if (filter_level)
|
||||
{
|
||||
@@ -879,6 +895,10 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
xd->above_context++;
|
||||
|
||||
pbi->mt_current_mb_col[mb_row] = mb_col;
|
||||
|
||||
/* macroblock counter */
|
||||
if (mb_row != pbi->common.mb_rows-1)
|
||||
sem_post(&pbi->h_mb_counter[0]);
|
||||
}
|
||||
|
||||
/* adjust to the next row of mbs */
|
||||
|
@@ -17,7 +17,7 @@
|
||||
#if HAVE_MMX
|
||||
void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
|
||||
|
||||
void vp8_dequantize_b_mmx(BLOCKD *d)
|
||||
static void dequantize_b_mmx(BLOCKD *d)
|
||||
{
|
||||
short *sq = (short *) d->qcoeff;
|
||||
short *dq = (short *) d->dqcoeff;
|
||||
@@ -41,7 +41,7 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
|
||||
#if HAVE_MMX
|
||||
if (flags & HAS_MMX)
|
||||
{
|
||||
pbi->dequant.block = vp8_dequantize_b_mmx;
|
||||
pbi->dequant.block = dequantize_b_mmx;
|
||||
pbi->dequant.idct_add = vp8_dequant_idct_add_mmx;
|
||||
pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_mmx;
|
||||
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_mmx;
|
||||
|
@@ -35,23 +35,23 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
|
||||
cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/
|
||||
|
||||
/*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;
|
||||
cpi->rtcd.variance.var8x8 = vp8_variance8x8_c;
|
||||
cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
|
||||
/*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;*/
|
||||
cpi->rtcd.variance.var8x8 = vp8_variance8x8_armv6;
|
||||
/*cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
|
||||
cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;*/
|
||||
cpi->rtcd.variance.var16x16 = vp8_variance16x16_armv6;
|
||||
|
||||
/*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
|
||||
cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
|
||||
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
|
||||
/*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;*/
|
||||
cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_armv6;
|
||||
/*cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
|
||||
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/
|
||||
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_armv6;
|
||||
cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6;
|
||||
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_armv6;
|
||||
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6;
|
||||
|
||||
/*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
|
||||
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
|
||||
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_armv6;
|
||||
/*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
|
||||
|
||||
/*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
|
||||
cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
|
||||
@@ -59,9 +59,9 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/
|
||||
|
||||
/*cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
|
||||
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
|
||||
cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c;
|
||||
cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c;*/
|
||||
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;*/
|
||||
cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_armv6;
|
||||
cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_armv6;
|
||||
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6;
|
||||
|
||||
/*cpi->rtcd.encodemb.berr = vp8_block_error_c;
|
||||
@@ -71,8 +71,8 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.encodemb.submby = vp8_subtract_mby_c;
|
||||
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;*/
|
||||
|
||||
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/
|
||||
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;*/
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_armv6;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
262
vp8/encoder/arm/armv6/vp8_fast_fdct4x4_armv6.asm
Normal file
262
vp8/encoder/arm/armv6/vp8_fast_fdct4x4_armv6.asm
Normal file
@@ -0,0 +1,262 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_fast_fdct4x4_armv6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
; void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
|
||||
|vp8_fast_fdct4x4_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4 - r12, lr}
|
||||
|
||||
; PART 1
|
||||
|
||||
; coeffs 0-3
|
||||
ldrd r4, r5, [r0] ; [i1 | i0] [i3 | i2]
|
||||
|
||||
ldr r10, c7500
|
||||
ldr r11, c14500
|
||||
ldr r12, c0x22a453a0 ; [2217*4 | 5352*4]
|
||||
ldr lr, c0x00080008
|
||||
ror r5, r5, #16 ; [i2 | i3]
|
||||
|
||||
qadd16 r6, r4, r5 ; [i1+i2 | i0+i3] = [b1 | a1] without shift
|
||||
qsub16 r7, r4, r5 ; [i1-i2 | i0-i3] = [c1 | d1] without shift
|
||||
|
||||
add r0, r0, r2 ; update input pointer
|
||||
|
||||
qadd16 r7, r7, r7 ; 2*[c1|d1] --> we can use smlad and smlsd
|
||||
; with 2217*4 and 5352*4 without losing the
|
||||
; sign bit (overflow)
|
||||
|
||||
smuad r4, r6, lr ; o0 = (i1+i2)*8 + (i0+i3)*8
|
||||
smusd r5, r6, lr ; o2 = (i1+i2)*8 - (i0+i3)*8
|
||||
|
||||
smlad r6, r7, r12, r11 ; o1 = (c1 * 2217 + d1 * 5352 + 14500)
|
||||
smlsdx r7, r7, r12, r10 ; o3 = (d1 * 2217 - c1 * 5352 + 7500)
|
||||
|
||||
ldrd r8, r9, [r0] ; [i5 | i4] [i7 | i6]
|
||||
|
||||
pkhbt r3, r4, r6, lsl #4 ; [o1 | o0], keep in register for PART 2
|
||||
pkhbt r6, r5, r7, lsl #4 ; [o3 | o2]
|
||||
|
||||
str r6, [r1, #4]
|
||||
|
||||
; coeffs 4-7
|
||||
ror r9, r9, #16 ; [i6 | i7]
|
||||
|
||||
qadd16 r6, r8, r9 ; [i5+i6 | i4+i7] = [b1 | a1] without shift
|
||||
qsub16 r7, r8, r9 ; [i5-i6 | i4-i7] = [c1 | d1] without shift
|
||||
|
||||
add r0, r0, r2 ; update input pointer
|
||||
|
||||
qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd
|
||||
; with 2217*4 and 5352*4 without losing the
|
||||
; sign bit (overflow)
|
||||
|
||||
smuad r9, r6, lr ; o4 = (i5+i6)*8 + (i4+i7)*8
|
||||
smusd r8, r6, lr ; o6 = (i5+i6)*8 - (i4+i7)*8
|
||||
|
||||
smlad r6, r7, r12, r11 ; o5 = (c1 * 2217 + d1 * 5352 + 14500)
|
||||
smlsdx r7, r7, r12, r10 ; o7 = (d1 * 2217 - c1 * 5352 + 7500)
|
||||
|
||||
ldrd r4, r5, [r0] ; [i9 | i8] [i11 | i10]
|
||||
|
||||
pkhbt r9, r9, r6, lsl #4 ; [o5 | o4], keep in register for PART 2
|
||||
pkhbt r6, r8, r7, lsl #4 ; [o7 | o6]
|
||||
|
||||
str r6, [r1, #12]
|
||||
|
||||
; coeffs 8-11
|
||||
ror r5, r5, #16 ; [i10 | i11]
|
||||
|
||||
qadd16 r6, r4, r5 ; [i9+i10 | i8+i11]=[b1 | a1] without shift
|
||||
qsub16 r7, r4, r5 ; [i9-i10 | i8-i11]=[c1 | d1] without shift
|
||||
|
||||
add r0, r0, r2 ; update input pointer
|
||||
|
||||
qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd
|
||||
; with 2217*4 and 5352*4 without losing the
|
||||
; sign bit (overflow)
|
||||
|
||||
smuad r2, r6, lr ; o8 = (i9+i10)*8 + (i8+i11)*8
|
||||
smusd r8, r6, lr ; o10 = (i9+i10)*8 - (i8+i11)*8
|
||||
|
||||
smlad r6, r7, r12, r11 ; o9 = (c1 * 2217 + d1 * 5352 + 14500)
|
||||
smlsdx r7, r7, r12, r10 ; o11 = (d1 * 2217 - c1 * 5352 + 7500)
|
||||
|
||||
ldrd r4, r5, [r0] ; [i13 | i12] [i15 | i14]
|
||||
|
||||
pkhbt r2, r2, r6, lsl #4 ; [o9 | o8], keep in register for PART 2
|
||||
pkhbt r6, r8, r7, lsl #4 ; [o11 | o10]
|
||||
|
||||
str r6, [r1, #20]
|
||||
|
||||
; coeffs 12-15
|
||||
ror r5, r5, #16 ; [i14 | i15]
|
||||
|
||||
qadd16 r6, r4, r5 ; [i13+i14 | i12+i15]=[b1|a1] without shift
|
||||
qsub16 r7, r4, r5 ; [i13-i14 | i12-i15]=[c1|d1] without shift
|
||||
|
||||
qadd16 r7, r7, r7 ; 2x[c1|d1] --> we can use smlad and smlsd
|
||||
; with 2217*4 and 5352*4 without losing the
|
||||
; sign bit (overflow)
|
||||
|
||||
smuad r4, r6, lr ; o12 = (i13+i14)*8 + (i12+i15)*8
|
||||
smusd r5, r6, lr ; o14 = (i13+i14)*8 - (i12+i15)*8
|
||||
|
||||
smlad r6, r7, r12, r11 ; o13 = (c1 * 2217 + d1 * 5352 + 14500)
|
||||
smlsdx r7, r7, r12, r10 ; o15 = (d1 * 2217 - c1 * 5352 + 7500)
|
||||
|
||||
pkhbt r0, r4, r6, lsl #4 ; [o13 | o12], keep in register for PART 2
|
||||
pkhbt r6, r5, r7, lsl #4 ; [o15 | o14]
|
||||
|
||||
str r6, [r1, #28]
|
||||
|
||||
|
||||
; PART 2 -------------------------------------------------
|
||||
ldr r11, c12000
|
||||
ldr r10, c51000
|
||||
ldr lr, c0x00070007
|
||||
|
||||
qadd16 r4, r3, r0 ; a1 = [i1+i13 | i0+i12]
|
||||
qadd16 r5, r9, r2 ; b1 = [i5+i9 | i4+i8]
|
||||
qsub16 r6, r9, r2 ; c1 = [i5-i9 | i4-i8]
|
||||
qsub16 r7, r3, r0 ; d1 = [i1-i13 | i0-i12]
|
||||
|
||||
qadd16 r4, r4, lr ; a1 + 7
|
||||
|
||||
add r0, r11, #0x10000 ; add (d!=0)
|
||||
|
||||
qadd16 r2, r4, r5 ; a1 + b1 + 7
|
||||
qsub16 r3, r4, r5 ; a1 - b1 + 7
|
||||
|
||||
ldr r12, c0x08a914e8 ; [2217 | 5352]
|
||||
|
||||
lsl r8, r2, #16 ; prepare bottom halfword for scaling
|
||||
asr r2, r2, #4 ; scale top halfword
|
||||
lsl r9, r3, #16 ; prepare bottom halfword for scaling
|
||||
asr r3, r3, #4 ; scale top halfword
|
||||
pkhtb r4, r2, r8, asr #20 ; pack and scale bottom halfword
|
||||
pkhtb r5, r3, r9, asr #20 ; pack and scale bottom halfword
|
||||
|
||||
smulbt r2, r6, r12 ; [ ------ | c1*2217]
|
||||
str r4, [r1, #0] ; [ o1 | o0]
|
||||
smultt r3, r6, r12 ; [c1*2217 | ------ ]
|
||||
str r5, [r1, #16] ; [ o9 | o8]
|
||||
|
||||
smlabb r8, r7, r12, r2 ; [ ------ | d1*5352]
|
||||
smlatb r9, r7, r12, r3 ; [d1*5352 | ------ ]
|
||||
|
||||
smulbb r2, r6, r12 ; [ ------ | c1*5352]
|
||||
smultb r3, r6, r12 ; [c1*5352 | ------ ]
|
||||
|
||||
lsls r6, r7, #16 ; d1 != 0 ?
|
||||
addeq r8, r8, r11 ; c1_b*2217+d1_b*5352+12000 + (d==0)
|
||||
addne r8, r8, r0 ; c1_b*2217+d1_b*5352+12000 + (d!=0)
|
||||
asrs r6, r7, #16
|
||||
addeq r9, r9, r11 ; c1_t*2217+d1_t*5352+12000 + (d==0)
|
||||
addne r9, r9, r0 ; c1_t*2217+d1_t*5352+12000 + (d!=0)
|
||||
|
||||
smlabt r4, r7, r12, r10 ; [ ------ | d1*2217] + 51000
|
||||
smlatt r5, r7, r12, r10 ; [d1*2217 | ------ ] + 51000
|
||||
|
||||
pkhtb r9, r9, r8, asr #16
|
||||
|
||||
sub r4, r4, r2
|
||||
sub r5, r5, r3
|
||||
|
||||
ldr r3, [r1, #4] ; [i3 | i2]
|
||||
|
||||
pkhtb r5, r5, r4, asr #16 ; [o13|o12]
|
||||
|
||||
str r9, [r1, #8] ; [o5 | 04]
|
||||
|
||||
ldr r9, [r1, #12] ; [i7 | i6]
|
||||
ldr r8, [r1, #28] ; [i15|i14]
|
||||
ldr r2, [r1, #20] ; [i11|i10]
|
||||
str r5, [r1, #24] ; [o13|o12]
|
||||
|
||||
qadd16 r4, r3, r8 ; a1 = [i3+i15 | i2+i14]
|
||||
qadd16 r5, r9, r2 ; b1 = [i7+i11 | i6+i10]
|
||||
|
||||
qadd16 r4, r4, lr ; a1 + 7
|
||||
|
||||
qsub16 r6, r9, r2 ; c1 = [i7-i11 | i6-i10]
|
||||
qadd16 r2, r4, r5 ; a1 + b1 + 7
|
||||
qsub16 r7, r3, r8 ; d1 = [i3-i15 | i2-i14]
|
||||
qsub16 r3, r4, r5 ; a1 - b1 + 7
|
||||
|
||||
lsl r8, r2, #16 ; prepare bottom halfword for scaling
|
||||
asr r2, r2, #4 ; scale top halfword
|
||||
lsl r9, r3, #16 ; prepare bottom halfword for scaling
|
||||
asr r3, r3, #4 ; scale top halfword
|
||||
pkhtb r4, r2, r8, asr #20 ; pack and scale bottom halfword
|
||||
pkhtb r5, r3, r9, asr #20 ; pack and scale bottom halfword
|
||||
|
||||
smulbt r2, r6, r12 ; [ ------ | c1*2217]
|
||||
str r4, [r1, #4] ; [ o3 | o2]
|
||||
smultt r3, r6, r12 ; [c1*2217 | ------ ]
|
||||
str r5, [r1, #20] ; [ o11 | o10]
|
||||
|
||||
smlabb r8, r7, r12, r2 ; [ ------ | d1*5352]
|
||||
smlatb r9, r7, r12, r3 ; [d1*5352 | ------ ]
|
||||
|
||||
smulbb r2, r6, r12 ; [ ------ | c1*5352]
|
||||
smultb r3, r6, r12 ; [c1*5352 | ------ ]
|
||||
|
||||
lsls r6, r7, #16 ; d1 != 0 ?
|
||||
addeq r8, r8, r11 ; c1_b*2217+d1_b*5352+12000 + (d==0)
|
||||
addne r8, r8, r0 ; c1_b*2217+d1_b*5352+12000 + (d!=0)
|
||||
|
||||
asrs r6, r7, #16
|
||||
addeq r9, r9, r11 ; c1_t*2217+d1_t*5352+12000 + (d==0)
|
||||
addne r9, r9, r0 ; c1_t*2217+d1_t*5352+12000 + (d!=0)
|
||||
|
||||
smlabt r4, r7, r12, r10 ; [ ------ | d1*2217] + 51000
|
||||
smlatt r5, r7, r12, r10 ; [d1*2217 | ------ ] + 51000
|
||||
|
||||
pkhtb r9, r9, r8, asr #16
|
||||
|
||||
sub r4, r4, r2
|
||||
sub r5, r5, r3
|
||||
|
||||
str r9, [r1, #12] ; [o7 | o6]
|
||||
pkhtb r5, r5, r4, asr #16 ; [o15|o14]
|
||||
|
||||
str r5, [r1, #28] ; [o15|o14]
|
||||
|
||||
ldmfd sp!, {r4 - r12, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
; Used constants
|
||||
c7500
|
||||
DCD 7500
|
||||
c14500
|
||||
DCD 14500
|
||||
c0x22a453a0
|
||||
DCD 0x22a453a0
|
||||
c0x00080008
|
||||
DCD 0x00080008
|
||||
c12000
|
||||
DCD 12000
|
||||
c51000
|
||||
DCD 51000
|
||||
c0x00070007
|
||||
DCD 0x00070007
|
||||
c0x08a914e8
|
||||
DCD 0x08a914e8
|
||||
|
||||
END
|
224
vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
Normal file
224
vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
Normal file
@@ -0,0 +1,224 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_fast_quantize_b_armv6|
|
||||
|
||||
INCLUDE asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 BLOCK *b
|
||||
; r1 BLOCKD *d
|
||||
|vp8_fast_quantize_b_armv6| PROC
|
||||
stmfd sp!, {r1, r4-r11, lr}
|
||||
|
||||
ldr r3, [r0, #vp8_block_coeff] ; coeff
|
||||
ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast
|
||||
ldr r5, [r0, #vp8_block_round] ; round
|
||||
ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff
|
||||
ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff
|
||||
ldr r8, [r1, #vp8_blockd_dequant] ; dequant
|
||||
|
||||
ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction
|
||||
; is used to update the counter so that
|
||||
; it can be used to mark nonzero
|
||||
; quantized coefficient pairs.
|
||||
|
||||
mov r1, #0 ; flags for quantized coeffs
|
||||
|
||||
; PART 1: quantization and dequantization loop
|
||||
loop
|
||||
ldr r9, [r3], #4 ; [z1 | z0]
|
||||
ldr r10, [r5], #4 ; [r1 | r0]
|
||||
ldr r11, [r4], #4 ; [q1 | q0]
|
||||
|
||||
ssat16 lr, #1, r9 ; [sz1 | sz0]
|
||||
eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0]
|
||||
ssub16 r9, r9, lr ; x = (z ^ sz) - sz
|
||||
sadd16 r9, r9, r10 ; [x1+r1 | x0+r0]
|
||||
|
||||
ldr r12, [r3], #4 ; [z3 | z2]
|
||||
|
||||
smulbb r0, r9, r11 ; [(x0+r0)*q0]
|
||||
smultt r9, r9, r11 ; [(x1+r1)*q1]
|
||||
|
||||
ldr r10, [r5], #4 ; [r3 | r2]
|
||||
|
||||
ssat16 r11, #1, r12 ; [sz3 | sz2]
|
||||
eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2]
|
||||
pkhtb r0, r9, r0, asr #16 ; [y1 | y0]
|
||||
ldr r9, [r4], #4 ; [q3 | q2]
|
||||
ssub16 r12, r12, r11 ; x = (z ^ sz) - sz
|
||||
|
||||
sadd16 r12, r12, r10 ; [x3+r3 | x2+r2]
|
||||
|
||||
eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)]
|
||||
|
||||
smulbb r10, r12, r9 ; [(x2+r2)*q2]
|
||||
smultt r12, r12, r9 ; [(x3+r3)*q3]
|
||||
|
||||
ssub16 r0, r0, lr ; x = (y ^ sz) - sz
|
||||
|
||||
cmp r0, #0 ; check if zero
|
||||
orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs
|
||||
|
||||
str r0, [r6], #4 ; *qcoeff++ = x
|
||||
ldr r9, [r8], #4 ; [dq1 | dq0]
|
||||
|
||||
pkhtb r10, r12, r10, asr #16 ; [y3 | y2]
|
||||
eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)]
|
||||
ssub16 r10, r10, r11 ; x = (y ^ sz) - sz
|
||||
|
||||
cmp r10, #0 ; check if zero
|
||||
orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs
|
||||
|
||||
str r10, [r6], #4 ; *qcoeff++ = x
|
||||
ldr r11, [r8], #4 ; [dq3 | dq2]
|
||||
|
||||
smulbb r12, r0, r9 ; [x0*dq0]
|
||||
smultt r0, r0, r9 ; [x1*dq1]
|
||||
|
||||
smulbb r9, r10, r11 ; [x2*dq2]
|
||||
smultt r10, r10, r11 ; [x3*dq3]
|
||||
|
||||
lsls r2, r2, #2 ; update loop counter
|
||||
strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0]
|
||||
strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1]
|
||||
strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2]
|
||||
strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3]
|
||||
add r7, r7, #8 ; dqcoeff += 8
|
||||
bne loop
|
||||
|
||||
; PART 2: check position for eob...
|
||||
mov lr, #0 ; init eob
|
||||
cmp r1, #0 ; coeffs after quantization?
|
||||
ldr r11, [sp, #0] ; restore BLOCKD pointer
|
||||
beq end ; skip eob calculations if all zero
|
||||
|
||||
ldr r0, [r11, #vp8_blockd_qcoeff]
|
||||
|
||||
; check shortcut for nonzero qcoeffs
|
||||
tst r1, #0x80
|
||||
bne quant_coeff_15_14
|
||||
tst r1, #0x20
|
||||
bne quant_coeff_13_11
|
||||
tst r1, #0x8
|
||||
bne quant_coeff_12_7
|
||||
tst r1, #0x40
|
||||
bne quant_coeff_10_9
|
||||
tst r1, #0x10
|
||||
bne quant_coeff_8_3
|
||||
tst r1, #0x2
|
||||
bne quant_coeff_6_5
|
||||
tst r1, #0x4
|
||||
bne quant_coeff_4_2
|
||||
b quant_coeff_1_0
|
||||
|
||||
quant_coeff_15_14
|
||||
ldrh r2, [r0, #30] ; rc=15, i=15
|
||||
mov lr, #16
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
ldrh r3, [r0, #28] ; rc=14, i=14
|
||||
mov lr, #15
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_13_11
|
||||
ldrh r2, [r0, #22] ; rc=11, i=13
|
||||
mov lr, #14
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_12_7
|
||||
ldrh r3, [r0, #14] ; rc=7, i=12
|
||||
mov lr, #13
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #20] ; rc=10, i=11
|
||||
mov lr, #12
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_10_9
|
||||
ldrh r3, [r0, #26] ; rc=13, i=10
|
||||
mov lr, #11
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #24] ; rc=12, i=9
|
||||
mov lr, #10
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_8_3
|
||||
ldrh r3, [r0, #18] ; rc=9, i=8
|
||||
mov lr, #9
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #12] ; rc=6, i=7
|
||||
mov lr, #8
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_6_5
|
||||
ldrh r3, [r0, #6] ; rc=3, i=6
|
||||
mov lr, #7
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #4] ; rc=2, i=5
|
||||
mov lr, #6
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_4_2
|
||||
ldrh r3, [r0, #10] ; rc=5, i=4
|
||||
mov lr, #5
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #16] ; rc=8, i=3
|
||||
mov lr, #4
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
ldrh r3, [r0, #8] ; rc=4, i=2
|
||||
mov lr, #3
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_1_0
|
||||
ldrh r2, [r0, #2] ; rc=1, i=1
|
||||
mov lr, #2
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
mov lr, #1 ; rc=0, i=0
|
||||
|
||||
end
|
||||
str lr, [r11, #vp8_blockd_eob]
|
||||
ldmfd sp!, {r1, r4-r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
loop_count
|
||||
DCD 0x1000000
|
||||
|
||||
END
|
||||
|
133
vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
Normal file
133
vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
Normal file
@@ -0,0 +1,133 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_mse16x16_armv6|
|
||||
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
;
|
||||
;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
|
||||
; So, we can remove this part of calculation.
|
||||
|
||||
|vp8_mse16x16_armv6| PROC
|
||||
|
||||
push {r4-r9, lr}
|
||||
mov r12, #16 ; set loop counter to 16 (=block height)
|
||||
|
||||
mov r4, #0 ; initialize sse = 0
|
||||
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r5, [r0, #0x0] ; load 4 src pixels
|
||||
ldr r6, [r2, #0x0] ; load 4 ref pixels
|
||||
|
||||
mov lr, #0 ; constant zero
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
|
||||
ldr r5, [r0, #0x4] ; load 4 src pixels
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r6, [r2, #0x4] ; load 4 ref pixels
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
ldr r5, [r0, #0x8] ; load 4 src pixels
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 3rd 4 pixels
|
||||
ldr r6, [r2, #0x8] ; load 4 ref pixels
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
|
||||
ldr r5, [r0, #0xc] ; load 4 src pixels
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 4th 4 pixels
|
||||
ldr r6, [r2, #0xc] ; load 4 ref pixels
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r5, r6 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r7, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r6, r5 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r5, r7, lr ; calculate sum of positive differences
|
||||
usad8 r6, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r7 ; differences of all 4 pixels
|
||||
|
||||
subs r12, r12, #1 ; next row
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r6, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
|
||||
smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r1, [sp, #28] ; get address of sse
|
||||
mov r0, r4 ; return sse
|
||||
str r4, [r1] ; store sse
|
||||
|
||||
pop {r4-r9, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
95
vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
Normal file
95
vp8/encoder/arm/armv6/vp8_variance8x8_armv6.asm
Normal file
@@ -0,0 +1,95 @@
|
||||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_variance8x8_armv6|
|
||||
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
|vp8_variance8x8_armv6| PROC
|
||||
|
||||
push {r4-r10, lr}
|
||||
mov r12, #8 ; set loop counter to 8 (=block height)
|
||||
mov r4, #0 ; initialize sum = 0
|
||||
mov r5, #0 ; initialize sse = 0
|
||||
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r6, [r0, #0x0] ; load 4 src pixels
|
||||
ldr r7, [r2, #0x0] ; load 4 ref pixels
|
||||
|
||||
mov lr, #0 ; constant zero
|
||||
|
||||
usub8 r8, r6, r7 ; calculate difference
|
||||
sel r10, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r7, r6 ; calculate difference with reversed operands
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r6, r10, lr ; calculate sum of positive differences
|
||||
usad8 r7, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r10 ; differences of all 4 pixels
|
||||
; calculate total sum
|
||||
add r4, r4, r6 ; add positive differences to sum
|
||||
sub r4, r4, r7 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r7, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r6, [r0, #0x4] ; load 4 src pixels
|
||||
ldr r7, [r2, #0x4] ; load 4 ref pixels
|
||||
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r6, r7 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r10, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r7, r6 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r6, r10, lr ; calculate sum of positive differences
|
||||
usad8 r7, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r10 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r4, r4, r6 ; add positive differences to sum
|
||||
sub r4, r4, r7 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r7, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
|
||||
subs r12, r12, #1 ; next row
|
||||
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r8, [sp, #32] ; get address of sse
|
||||
mul r1, r4, r4 ; sum * sum
|
||||
str r5, [r8] ; store sse
|
||||
sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6))
|
||||
|
||||
pop {r4-r10, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
24
vp8/encoder/arm/dct_arm.c
Normal file
24
vp8/encoder/arm/dct_arm.c
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/encoder/dct.h"
|
||||
|
||||
#if HAVE_ARMV6
|
||||
|
||||
void vp8_fast_fdct8x4_armv6(short *input, short *output, int pitch)
|
||||
{
|
||||
vp8_fast_fdct4x4_armv6(input, output, pitch);
|
||||
vp8_fast_fdct4x4_armv6(input + 4, output + 16, pitch);
|
||||
}
|
||||
|
||||
#endif /* HAVE_ARMV6 */
|
||||
|
||||
|
@@ -14,12 +14,21 @@
|
||||
|
||||
#if HAVE_ARMV6
|
||||
extern prototype_fdct(vp8_short_walsh4x4_armv6);
|
||||
extern prototype_fdct(vp8_fast_fdct4x4_armv6);
|
||||
extern prototype_fdct(vp8_fast_fdct8x4_armv6);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_fdct_walsh_short4x4
|
||||
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6
|
||||
|
||||
#undef vp8_fdct_fast4x4
|
||||
#define vp8_fdct_fast4x4 vp8_fast_fdct4x4_armv6
|
||||
|
||||
#undef vp8_fdct_fast8x4
|
||||
#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_armv6
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* HAVE_ARMV6 */
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern prototype_fdct(vp8_short_fdct4x4_neon);
|
||||
|
@@ -12,6 +12,16 @@
|
||||
#ifndef QUANTIZE_ARM_H
|
||||
#define QUANTIZE_ARM_H
|
||||
|
||||
#if HAVE_ARMV6
|
||||
|
||||
extern prototype_quantize_block(vp8_fast_quantize_b_armv6);
|
||||
|
||||
#undef vp8_quantize_fastquantb
|
||||
#define vp8_quantize_fastquantb vp8_fast_quantize_b_armv6
|
||||
|
||||
#endif /* HAVE_ARMV6 */
|
||||
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern prototype_quantize_block(vp8_fast_quantize_b_neon);
|
||||
|
||||
|
@@ -15,6 +15,34 @@
|
||||
|
||||
#if HAVE_ARMV6
|
||||
|
||||
unsigned int vp8_sub_pixel_variance8x8_armv6
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
unsigned short first_pass[10*8];
|
||||
unsigned char second_pass[8*8];
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass,
|
||||
src_pixels_per_line,
|
||||
9, 8, HFilter);
|
||||
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
|
||||
8, 8, 8, VFilter);
|
||||
|
||||
return vp8_variance8x8_armv6(second_pass, 8, dst_ptr,
|
||||
dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x16_armv6
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
|
@@ -16,10 +16,13 @@
|
||||
|
||||
extern prototype_sad(vp8_sad16x16_armv6);
|
||||
extern prototype_variance(vp8_variance16x16_armv6);
|
||||
extern prototype_variance(vp8_variance8x8_armv6);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_armv6);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
|
||||
extern prototype_variance(vp8_mse16x16_armv6);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
@@ -29,9 +32,18 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
|
||||
#undef vp8_variance_subpixvar16x16
|
||||
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_armv6
|
||||
|
||||
#undef vp8_variance_subpixvar8x8
|
||||
#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_armv6
|
||||
|
||||
#undef vp8_variance_var16x16
|
||||
#define vp8_variance_var16x16 vp8_variance16x16_armv6
|
||||
|
||||
#undef vp8_variance_mse16x16
|
||||
#define vp8_variance_mse16x16 vp8_mse16x16_armv6
|
||||
|
||||
#undef vp8_variance_var8x8
|
||||
#define vp8_variance_var8x8 vp8_variance8x8_armv6
|
||||
|
||||
#undef vp8_variance_halfpixvar16x16_h
|
||||
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6
|
||||
|
||||
|
@@ -65,6 +65,17 @@ DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST));
|
||||
|
||||
DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
|
||||
|
||||
// offsets from BLOCK structure
|
||||
DEFINE(vp8_block_coeff, offsetof(BLOCK, coeff));
|
||||
DEFINE(vp8_block_quant_fast, offsetof(BLOCK, quant_fast));
|
||||
DEFINE(vp8_block_round, offsetof(BLOCK, round));
|
||||
|
||||
// offsets from BLOCKD structure
|
||||
DEFINE(vp8_blockd_qcoeff, offsetof(BLOCKD, qcoeff));
|
||||
DEFINE(vp8_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
|
||||
DEFINE(vp8_blockd_dequant, offsetof(BLOCKD, dequant));
|
||||
DEFINE(vp8_blockd_eob, offsetof(BLOCKD, eob));
|
||||
|
||||
// These two sizes are used in vp8cx_pack_tokens. They are hard coded
|
||||
// so if the size changes this will have to be adjusted.
|
||||
#if HAVE_ARMV5TE
|
||||
|
@@ -1366,6 +1366,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
|
||||
oh.show_frame = (int) pc->show_frame;
|
||||
oh.type = (int)pc->frame_type;
|
||||
oh.version = pc->version;
|
||||
oh.first_partition_length_in_bytes = 0;
|
||||
|
||||
mb_feature_data_bits = vp8_mb_feature_data_bits;
|
||||
cx_data += 3;
|
||||
@@ -1634,6 +1635,21 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
|
||||
|
||||
vp8_stop_encode(bc);
|
||||
|
||||
oh.first_partition_length_in_bytes = cpi->bc.pos;
|
||||
|
||||
/* update frame tag */
|
||||
{
|
||||
int v = (oh.first_partition_length_in_bytes << 5) |
|
||||
(oh.show_frame << 4) |
|
||||
(oh.version << 1) |
|
||||
oh.type;
|
||||
|
||||
dest[0] = v;
|
||||
dest[1] = v >> 8;
|
||||
dest[2] = v >> 16;
|
||||
}
|
||||
|
||||
*size = VP8_HEADER_SIZE + extra_bytes_packed + cpi->bc.pos;
|
||||
|
||||
if (pc->multi_token_partition != ONE_PARTITION)
|
||||
{
|
||||
@@ -1643,9 +1659,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
|
||||
|
||||
pack_tokens_into_partitions(cpi, cx_data + bc->pos, num_part, &asize);
|
||||
|
||||
oh.first_partition_length_in_bytes = cpi->bc.pos;
|
||||
|
||||
*size = cpi->bc.pos + VP8_HEADER_SIZE + asize + extra_bytes_packed;
|
||||
*size += asize;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1659,19 +1673,8 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
|
||||
pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count);
|
||||
|
||||
vp8_stop_encode(&cpi->bc2);
|
||||
oh.first_partition_length_in_bytes = cpi->bc.pos ;
|
||||
*size = cpi->bc2.pos + cpi->bc.pos + VP8_HEADER_SIZE + extra_bytes_packed;
|
||||
}
|
||||
|
||||
{
|
||||
int v = (oh.first_partition_length_in_bytes << 5) |
|
||||
(oh.show_frame << 4) |
|
||||
(oh.version << 1) |
|
||||
oh.type;
|
||||
|
||||
dest[0] = v;
|
||||
dest[1] = v >> 8;
|
||||
dest[2] = v >> 16;
|
||||
*size += cpi->bc2.pos;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -808,7 +808,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
|
||||
|
||||
for (i = 0; i < cm->mb_rows; i++)
|
||||
cpi->mt_current_mb_col[i] = 0;
|
||||
cpi->mt_current_mb_col[i] = -1;
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
@@ -1184,7 +1184,8 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
|
||||
int distortion2;
|
||||
|
||||
x->e_mbd.mode_info_context->mbmi.mode = mode;
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
|
||||
(&x->e_mbd);
|
||||
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
|
||||
rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode];
|
||||
this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
|
||||
|
@@ -25,19 +25,6 @@
|
||||
#define intra4x4pbias_rate 256
|
||||
|
||||
|
||||
void vp8_update_mode_context(int *abmode, int *lbmode, int i, int best_mode)
|
||||
{
|
||||
if (i < 12)
|
||||
{
|
||||
abmode[i+4] = best_mode;
|
||||
}
|
||||
|
||||
if ((i & 3) != 3)
|
||||
{
|
||||
lbmode[i+1] = best_mode;
|
||||
}
|
||||
|
||||
}
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
#define IF_RTCD(x) (x)
|
||||
#else
|
||||
@@ -80,7 +67,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
{
|
||||
int b;
|
||||
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd);
|
||||
|
||||
ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
|
||||
|
||||
|
@@ -17,7 +17,6 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
|
||||
void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
|
||||
void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *, MACROBLOCK *mb);
|
||||
void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);
|
||||
void vp8_update_mode_context(int *abmode, int *lbmode, int i, int best_mode);
|
||||
void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);
|
||||
|
||||
#endif
|
||||
|
@@ -104,7 +104,7 @@ static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
|
||||
}
|
||||
|
||||
void vp8_build_dcblock(MACROBLOCK *x)
|
||||
static void build_dcblock(MACROBLOCK *x)
|
||||
{
|
||||
short *src_diff_ptr = &x->src_diff[384];
|
||||
int i;
|
||||
@@ -138,7 +138,7 @@ void vp8_transform_intra_mby(MACROBLOCK *x)
|
||||
}
|
||||
|
||||
// build dc block from 16 y dc values
|
||||
vp8_build_dcblock(x);
|
||||
build_dcblock(x);
|
||||
|
||||
// do 2nd order transform on the dc block
|
||||
x->short_walsh4x4(&x->block[24].src_diff[0],
|
||||
@@ -147,7 +147,7 @@ void vp8_transform_intra_mby(MACROBLOCK *x)
|
||||
}
|
||||
|
||||
|
||||
void vp8_transform_mb(MACROBLOCK *x)
|
||||
static void transform_mb(MACROBLOCK *x)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -159,7 +159,7 @@ void vp8_transform_mb(MACROBLOCK *x)
|
||||
|
||||
// build dc block from 16 y dc values
|
||||
if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
|
||||
vp8_build_dcblock(x);
|
||||
build_dcblock(x);
|
||||
|
||||
for (i = 16; i < 24; i += 2)
|
||||
{
|
||||
@@ -174,7 +174,8 @@ void vp8_transform_mb(MACROBLOCK *x)
|
||||
|
||||
}
|
||||
|
||||
void vp8_transform_mby(MACROBLOCK *x)
|
||||
|
||||
static void transform_mby(MACROBLOCK *x)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -187,7 +188,7 @@ void vp8_transform_mby(MACROBLOCK *x)
|
||||
// build dc block from 16 y dc values
|
||||
if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
vp8_build_dcblock(x);
|
||||
build_dcblock(x);
|
||||
x->short_walsh4x4(&x->block[24].src_diff[0],
|
||||
&x->block[24].coeff[0], 8);
|
||||
}
|
||||
@@ -255,9 +256,9 @@ static const int plane_rd_mult[4]=
|
||||
Y1_RD_MULT
|
||||
};
|
||||
|
||||
void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
|
||||
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
|
||||
const VP8_ENCODER_RTCD *rtcd)
|
||||
static void optimize_b(MACROBLOCK *mb, int ib, int type,
|
||||
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
|
||||
const VP8_ENCODER_RTCD *rtcd)
|
||||
{
|
||||
BLOCK *b;
|
||||
BLOCKD *d;
|
||||
@@ -501,7 +502,7 @@ void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
|
||||
*a = *l = (d->eob != !type);
|
||||
}
|
||||
|
||||
void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
|
||||
static void optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
|
||||
{
|
||||
int b;
|
||||
int type;
|
||||
@@ -522,20 +523,20 @@ void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
|
||||
|
||||
for (b = 0; b < 16; b++)
|
||||
{
|
||||
vp8_optimize_b(x, b, type,
|
||||
optimize_b(x, b, type,
|
||||
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
|
||||
}
|
||||
|
||||
for (b = 16; b < 24; b++)
|
||||
{
|
||||
vp8_optimize_b(x, b, PLANE_TYPE_UV,
|
||||
optimize_b(x, b, PLANE_TYPE_UV,
|
||||
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
|
||||
}
|
||||
|
||||
if (has_2nd_order)
|
||||
{
|
||||
b=24;
|
||||
vp8_optimize_b(x, b, PLANE_TYPE_Y2,
|
||||
optimize_b(x, b, PLANE_TYPE_Y2,
|
||||
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
|
||||
}
|
||||
}
|
||||
@@ -569,7 +570,7 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
|
||||
|
||||
for (b = 0; b < 16; b++)
|
||||
{
|
||||
vp8_optimize_b(x, b, type,
|
||||
optimize_b(x, b, type,
|
||||
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
|
||||
}
|
||||
|
||||
@@ -577,7 +578,7 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
|
||||
if (has_2nd_order)
|
||||
{
|
||||
b=24;
|
||||
vp8_optimize_b(x, b, PLANE_TYPE_Y2,
|
||||
optimize_b(x, b, PLANE_TYPE_Y2,
|
||||
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
|
||||
}
|
||||
}
|
||||
@@ -603,7 +604,7 @@ void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
|
||||
|
||||
for (b = 16; b < 24; b++)
|
||||
{
|
||||
vp8_optimize_b(x, b, PLANE_TYPE_UV,
|
||||
optimize_b(x, b, PLANE_TYPE_UV,
|
||||
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
|
||||
}
|
||||
}
|
||||
@@ -615,13 +616,13 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
|
||||
vp8_subtract_mb(rtcd, x);
|
||||
|
||||
vp8_transform_mb(x);
|
||||
transform_mb(x);
|
||||
|
||||
vp8_quantize_mb(x);
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
if (x->optimize)
|
||||
vp8_optimize_mb(x, rtcd);
|
||||
optimize_mb(x, rtcd);
|
||||
#endif
|
||||
|
||||
vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
|
||||
@@ -638,7 +639,7 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
|
||||
ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
|
||||
|
||||
vp8_transform_mby(x);
|
||||
transform_mby(x);
|
||||
|
||||
vp8_quantize_mby(x);
|
||||
|
||||
@@ -649,22 +650,6 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
}
|
||||
|
||||
|
||||
void vp8_encode_inter16x16uv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
{
|
||||
vp8_build_inter_predictors_mbuv(&x->e_mbd);
|
||||
|
||||
ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
|
||||
|
||||
vp8_transform_mbuv(x);
|
||||
|
||||
vp8_quantize_mbuv(x);
|
||||
|
||||
vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
|
||||
|
||||
vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
|
||||
}
|
||||
|
||||
|
||||
void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
{
|
||||
vp8_build_inter_predictors_mbuv(&x->e_mbd);
|
||||
|
@@ -101,9 +101,6 @@ void vp8_build_dcblock(MACROBLOCK *b);
|
||||
void vp8_transform_mb(MACROBLOCK *mb);
|
||||
void vp8_transform_mbuv(MACROBLOCK *x);
|
||||
void vp8_transform_intra_mby(MACROBLOCK *x);
|
||||
void Encode16x16Y(MACROBLOCK *x);
|
||||
void Encode16x16UV(MACROBLOCK *x);
|
||||
void vp8_encode_inter16x16uv(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
|
||||
void vp8_encode_inter16x16uvrd(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
|
||||
void vp8_optimize_mby(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
|
||||
void vp8_optimize_mbuv(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd);
|
||||
|
@@ -24,6 +24,32 @@ extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
|
||||
extern void vp8_build_block_offsets(MACROBLOCK *x);
|
||||
extern void vp8_setup_block_ptrs(MACROBLOCK *x);
|
||||
|
||||
extern void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
|
||||
|
||||
static THREAD_FUNCTION loopfilter_thread(void *p_data)
|
||||
{
|
||||
VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1);
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (cpi->b_multi_threaded == 0)
|
||||
break;
|
||||
|
||||
if (sem_wait(&cpi->h_event_start_lpf) == 0)
|
||||
{
|
||||
if (cpi->b_multi_threaded == FALSE) // we're shutting down
|
||||
break;
|
||||
|
||||
loopfilter_frame(cpi, cm);
|
||||
|
||||
sem_post(&cpi->h_event_end_lpf);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
{
|
||||
@@ -429,56 +455,70 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
|
||||
|
||||
void vp8cx_create_encoder_threads(VP8_COMP *cpi)
|
||||
{
|
||||
cpi->b_multi_threaded = 0;
|
||||
const VP8_COMMON * cm = &cpi->common;
|
||||
|
||||
cpi->b_multi_threaded = 0;
|
||||
cpi->encoding_thread_count = 0;
|
||||
cpi->processor_core_count = 32; //vp8_get_proc_core_count();
|
||||
|
||||
if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
|
||||
{
|
||||
int ithread;
|
||||
int th_count = cpi->oxcf.multi_threaded - 1;
|
||||
|
||||
if (cpi->oxcf.multi_threaded > cpi->processor_core_count)
|
||||
cpi->encoding_thread_count = cpi->processor_core_count - 1;
|
||||
else
|
||||
cpi->encoding_thread_count = cpi->oxcf.multi_threaded - 1;
|
||||
th_count = cpi->processor_core_count - 1;
|
||||
|
||||
CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * cpi->encoding_thread_count));
|
||||
CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * cpi->encoding_thread_count));
|
||||
CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count));
|
||||
vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count);
|
||||
CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * cpi->encoding_thread_count));
|
||||
CHECK_MEM_ERROR(cpi->mt_current_mb_col, vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cpi->common.mb_rows));
|
||||
/* we have th_count + 1 (main) threads processing one row each */
|
||||
/* no point to have more threads than the sync range allows */
|
||||
if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1))
|
||||
{
|
||||
th_count = (cm->mb_cols / cpi->mt_sync_range) - 1;
|
||||
}
|
||||
|
||||
if(th_count == 0)
|
||||
return;
|
||||
|
||||
CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count));
|
||||
CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count));
|
||||
CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
|
||||
vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
|
||||
CHECK_MEM_ERROR(cpi->en_thread_data,
|
||||
vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
|
||||
CHECK_MEM_ERROR(cpi->mt_current_mb_col,
|
||||
vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
|
||||
|
||||
//cpi->h_event_main = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
sem_init(&cpi->h_event_end_encoding, 0, 0);
|
||||
|
||||
cpi->b_multi_threaded = 1;
|
||||
cpi->encoding_thread_count = th_count;
|
||||
|
||||
//printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", (cpi->encoding_thread_count +1));
|
||||
/*
|
||||
printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n",
|
||||
(cpi->encoding_thread_count +1));
|
||||
*/
|
||||
|
||||
for (ithread = 0; ithread < cpi->encoding_thread_count; ithread++)
|
||||
for (ithread = 0; ithread < th_count; ithread++)
|
||||
{
|
||||
ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread];
|
||||
|
||||
//cpi->h_event_mbrencoding[ithread] = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
sem_init(&cpi->h_event_start_encoding[ithread], 0, 0);
|
||||
ethd->ithread = ithread;
|
||||
ethd->ptr1 = (void *)cpi;
|
||||
ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread];
|
||||
|
||||
//printf(" call begin thread %d \n", ithread);
|
||||
|
||||
//cpi->h_encoding_thread[ithread] = (HANDLE)_beginthreadex(
|
||||
// NULL, // security
|
||||
// 0, // stksize
|
||||
// thread_encoding_proc,
|
||||
// (&cpi->en_thread_data[ithread]), // Thread data
|
||||
// 0,
|
||||
// NULL);
|
||||
|
||||
pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd);
|
||||
}
|
||||
|
||||
{
|
||||
LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data;
|
||||
|
||||
sem_init(&cpi->h_event_start_lpf, 0, 0);
|
||||
sem_init(&cpi->h_event_end_lpf, 0, 0);
|
||||
|
||||
lpfthd->ptr1 = (void *)cpi;
|
||||
pthread_create(&cpi->h_filter_thread, 0, loopfilter_thread, lpfthd);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -500,9 +540,14 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
|
||||
|
||||
sem_destroy(&cpi->h_event_start_encoding[i]);
|
||||
}
|
||||
|
||||
sem_post(&cpi->h_event_start_lpf);
|
||||
pthread_join(cpi->h_filter_thread, 0);
|
||||
}
|
||||
|
||||
sem_destroy(&cpi->h_event_end_encoding);
|
||||
sem_destroy(&cpi->h_event_end_lpf);
|
||||
sem_destroy(&cpi->h_event_start_lpf);
|
||||
|
||||
//free thread related resources
|
||||
vpx_free(cpi->h_event_start_encoding);
|
||||
|
@@ -67,7 +67,7 @@ static int vscale_lookup[7] = {0, 1, 1, 2, 2, 3, 3};
|
||||
static int hscale_lookup[7] = {0, 0, 1, 1, 2, 2, 3};
|
||||
|
||||
|
||||
const int cq_level[QINDEX_RANGE] =
|
||||
static const int cq_level[QINDEX_RANGE] =
|
||||
{
|
||||
0,0,1,1,2,3,3,4,4,5,6,6,7,8,8,9,
|
||||
9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,20,
|
||||
@@ -79,10 +79,9 @@ const int cq_level[QINDEX_RANGE] =
|
||||
86,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
|
||||
};
|
||||
|
||||
void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame);
|
||||
int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps);
|
||||
static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame);
|
||||
|
||||
int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
|
||||
static int encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
|
||||
{
|
||||
|
||||
int i;
|
||||
@@ -146,7 +145,7 @@ static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
/*start_pos = cpi->stats_in;
|
||||
sum_iiratio = 0.0;
|
||||
i = 0;
|
||||
while ( (i < 1) && vp8_input_stats(cpi,&next_frame) != EOF )
|
||||
while ( (i < 1) && input_stats(cpi,&next_frame) != EOF )
|
||||
{
|
||||
|
||||
next_iiratio = next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error);
|
||||
@@ -212,7 +211,7 @@ static const double weight_table[256] = {
|
||||
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000
|
||||
};
|
||||
|
||||
double vp8_simple_weight(YV12_BUFFER_CONFIG *source)
|
||||
static double simple_weight(YV12_BUFFER_CONFIG *source)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
@@ -240,7 +239,7 @@ double vp8_simple_weight(YV12_BUFFER_CONFIG *source)
|
||||
|
||||
|
||||
// This function returns the current per frame maximum bitrate target
|
||||
int frame_max_bits(VP8_COMP *cpi)
|
||||
static int frame_max_bits(VP8_COMP *cpi)
|
||||
{
|
||||
// Max allocation for a single frame based on the max section guidelines passed in and how many bits are left
|
||||
int max_bits;
|
||||
@@ -281,38 +280,26 @@ int frame_max_bits(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
|
||||
extern size_t vp8_firstpass_stats_sz(unsigned int mb_count)
|
||||
{
|
||||
/* Calculate the size of a stats packet, which is dependent on the frame
|
||||
* resolution. The FIRSTPASS_STATS struct has a single element array,
|
||||
* motion_map, which is virtually expanded to have one element per
|
||||
* macroblock.
|
||||
*/
|
||||
size_t stats_sz;
|
||||
|
||||
stats_sz = sizeof(FIRSTPASS_STATS) + mb_count;
|
||||
stats_sz = (stats_sz + 7) & ~7;
|
||||
return stats_sz;
|
||||
}
|
||||
|
||||
|
||||
void vp8_output_stats(const VP8_COMP *cpi,
|
||||
struct vpx_codec_pkt_list *pktlist,
|
||||
FIRSTPASS_STATS *stats)
|
||||
static void output_stats(const VP8_COMP *cpi,
|
||||
struct vpx_codec_pkt_list *pktlist,
|
||||
FIRSTPASS_STATS *stats)
|
||||
{
|
||||
struct vpx_codec_cx_pkt pkt;
|
||||
pkt.kind = VPX_CODEC_STATS_PKT;
|
||||
pkt.data.twopass_stats.buf = stats;
|
||||
pkt.data.twopass_stats.sz = vp8_firstpass_stats_sz(cpi->common.MBs);
|
||||
pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
|
||||
vpx_codec_pkt_list_add(pktlist, &pkt);
|
||||
|
||||
// TEMP debug code
|
||||
#if OUTPUT_FPF
|
||||
|
||||
{
|
||||
FILE *fpfile;
|
||||
fpfile = fopen("firstpass.stt", "a");
|
||||
|
||||
fprintf(fpfile, "%12.0f %12.0f %12.0f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.0f\n",
|
||||
fprintf(fpfile, "%12.0f %12.0f %12.0f %12.4f %12.4f %12.4f %12.4f"
|
||||
" %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f"
|
||||
" %12.0f %12.4f\n",
|
||||
stats->frame,
|
||||
stats->intra_error,
|
||||
stats->coded_error,
|
||||
@@ -320,6 +307,7 @@ void vp8_output_stats(const VP8_COMP *cpi,
|
||||
stats->pcnt_inter,
|
||||
stats->pcnt_motion,
|
||||
stats->pcnt_second_ref,
|
||||
stats->pcnt_neutral,
|
||||
stats->MVr,
|
||||
stats->mvr_abs,
|
||||
stats->MVc,
|
||||
@@ -327,30 +315,24 @@ void vp8_output_stats(const VP8_COMP *cpi,
|
||||
stats->MVrv,
|
||||
stats->MVcv,
|
||||
stats->mv_in_out_count,
|
||||
stats->count);
|
||||
fclose(fpfile);
|
||||
|
||||
|
||||
fpfile = fopen("fpmotionmap.stt", "a");
|
||||
if(fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, fpfile));
|
||||
stats->count,
|
||||
stats->duration);
|
||||
fclose(fpfile);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps)
|
||||
static int input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps)
|
||||
{
|
||||
size_t stats_sz = vp8_firstpass_stats_sz(cpi->common.MBs);
|
||||
|
||||
if (cpi->stats_in >= cpi->stats_in_end)
|
||||
return EOF;
|
||||
|
||||
*fps = *cpi->stats_in;
|
||||
cpi->stats_in = (void*)((char *)cpi->stats_in + stats_sz);
|
||||
cpi->stats_in = (void*)((char *)cpi->stats_in + sizeof(FIRSTPASS_STATS));
|
||||
return 1;
|
||||
}
|
||||
|
||||
void vp8_zero_stats(FIRSTPASS_STATS *section)
|
||||
static void zero_stats(FIRSTPASS_STATS *section)
|
||||
{
|
||||
section->frame = 0.0;
|
||||
section->intra_error = 0.0;
|
||||
@@ -359,6 +341,7 @@ void vp8_zero_stats(FIRSTPASS_STATS *section)
|
||||
section->pcnt_inter = 0.0;
|
||||
section->pcnt_motion = 0.0;
|
||||
section->pcnt_second_ref = 0.0;
|
||||
section->pcnt_neutral = 0.0;
|
||||
section->MVr = 0.0;
|
||||
section->mvr_abs = 0.0;
|
||||
section->MVc = 0.0;
|
||||
@@ -369,7 +352,7 @@ void vp8_zero_stats(FIRSTPASS_STATS *section)
|
||||
section->count = 0.0;
|
||||
section->duration = 1.0;
|
||||
}
|
||||
void vp8_accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame)
|
||||
static void accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame)
|
||||
{
|
||||
section->frame += frame->frame;
|
||||
section->intra_error += frame->intra_error;
|
||||
@@ -378,6 +361,7 @@ void vp8_accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame)
|
||||
section->pcnt_inter += frame->pcnt_inter;
|
||||
section->pcnt_motion += frame->pcnt_motion;
|
||||
section->pcnt_second_ref += frame->pcnt_second_ref;
|
||||
section->pcnt_neutral += frame->pcnt_neutral;
|
||||
section->MVr += frame->MVr;
|
||||
section->mvr_abs += frame->mvr_abs;
|
||||
section->MVc += frame->MVc;
|
||||
@@ -388,7 +372,7 @@ void vp8_accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame)
|
||||
section->count += frame->count;
|
||||
section->duration += frame->duration;
|
||||
}
|
||||
void vp8_avg_stats(FIRSTPASS_STATS *section)
|
||||
static void avg_stats(FIRSTPASS_STATS *section)
|
||||
{
|
||||
if (section->count < 1.0)
|
||||
return;
|
||||
@@ -398,6 +382,7 @@ void vp8_avg_stats(FIRSTPASS_STATS *section)
|
||||
section->ssim_weighted_pred_err /= section->count;
|
||||
section->pcnt_inter /= section->count;
|
||||
section->pcnt_second_ref /= section->count;
|
||||
section->pcnt_neutral /= section->count;
|
||||
section->pcnt_motion /= section->count;
|
||||
section->MVr /= section->count;
|
||||
section->mvr_abs /= section->count;
|
||||
@@ -409,65 +394,17 @@ void vp8_avg_stats(FIRSTPASS_STATS *section)
|
||||
section->duration /= section->count;
|
||||
}
|
||||
|
||||
unsigned char *vp8_fpmm_get_pos(VP8_COMP *cpi)
|
||||
{
|
||||
return cpi->fp_motion_map_stats;
|
||||
}
|
||||
void vp8_fpmm_reset_pos(VP8_COMP *cpi, unsigned char *target_pos)
|
||||
{
|
||||
cpi->fp_motion_map_stats = target_pos;
|
||||
}
|
||||
|
||||
void vp8_advance_fpmm(VP8_COMP *cpi, int count)
|
||||
{
|
||||
cpi->fp_motion_map_stats = (void*)((char*)cpi->fp_motion_map_stats +
|
||||
count * vp8_firstpass_stats_sz(cpi->common.MBs));
|
||||
}
|
||||
|
||||
void vp8_input_fpmm(VP8_COMP *cpi)
|
||||
{
|
||||
unsigned char *fpmm = cpi->fp_motion_map;
|
||||
int MBs = cpi->common.MBs;
|
||||
int max_frames = cpi->active_arnr_frames;
|
||||
int i;
|
||||
|
||||
for (i=0; i<max_frames; i++)
|
||||
{
|
||||
char *motion_map = (char*)cpi->fp_motion_map_stats
|
||||
+ sizeof(FIRSTPASS_STATS);
|
||||
|
||||
memcpy(fpmm, motion_map, MBs);
|
||||
fpmm += MBs;
|
||||
vp8_advance_fpmm(cpi, 1);
|
||||
}
|
||||
|
||||
// Flag the use of weights in the temporal filter
|
||||
cpi->use_weighted_temporal_filter = 1;
|
||||
}
|
||||
|
||||
void vp8_init_first_pass(VP8_COMP *cpi)
|
||||
{
|
||||
vp8_zero_stats(cpi->total_stats);
|
||||
|
||||
// TEMP debug code
|
||||
#ifdef OUTPUT_FPF
|
||||
{
|
||||
FILE *fpfile;
|
||||
fpfile = fopen("firstpass.stt", "w");
|
||||
fclose(fpfile);
|
||||
fpfile = fopen("fpmotionmap.stt", "wb");
|
||||
fclose(fpfile);
|
||||
}
|
||||
#endif
|
||||
|
||||
zero_stats(cpi->total_stats);
|
||||
}
|
||||
|
||||
void vp8_end_first_pass(VP8_COMP *cpi)
|
||||
{
|
||||
vp8_output_stats(cpi, cpi->output_pkt_list, cpi->total_stats);
|
||||
output_stats(cpi, cpi->output_pkt_list, cpi->total_stats);
|
||||
}
|
||||
|
||||
void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset )
|
||||
static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset )
|
||||
{
|
||||
MACROBLOCKD * const xd = & x->e_mbd;
|
||||
BLOCK *b = &x->block[0];
|
||||
@@ -486,7 +423,7 @@ void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * r
|
||||
VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16) ( src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err));
|
||||
}
|
||||
|
||||
void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *best_mv, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset )
|
||||
static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *best_mv, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset )
|
||||
{
|
||||
MACROBLOCKD *const xd = & x->e_mbd;
|
||||
BLOCK *b = &x->block[0];
|
||||
@@ -570,13 +507,12 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
int intercount = 0;
|
||||
int second_ref_count = 0;
|
||||
int intrapenalty = 256;
|
||||
int neutral_count = 0;
|
||||
|
||||
int sum_in_vectors = 0;
|
||||
|
||||
MV zero_ref_mv = {0, 0};
|
||||
|
||||
unsigned char *fp_motion_map_ptr = cpi->fp_motion_map;
|
||||
|
||||
vp8_clear_system_state(); //__asm emms;
|
||||
|
||||
x->src = * cpi->Source;
|
||||
@@ -628,7 +564,6 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
int this_error;
|
||||
int zero_error;
|
||||
int zz_to_best_ratio;
|
||||
int gf_motion_error = INT_MAX;
|
||||
int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
|
||||
@@ -639,7 +574,7 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
xd->left_available = (mb_col != 0);
|
||||
|
||||
// do intra 16x16 prediction
|
||||
this_error = vp8_encode_intra(cpi, x, use_dc_pred);
|
||||
this_error = encode_intra(cpi, x, use_dc_pred);
|
||||
|
||||
// "intrapenalty" below deals with situations where the intra and inter error scores are very low (eg a plain black frame)
|
||||
// We do not have special cases in first pass for 0,0 and nearest etc so all inter modes carry an overhead cost estimate fot the mv.
|
||||
@@ -650,9 +585,6 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
// Cumulative intra error total
|
||||
intra_error += (long long)this_error;
|
||||
|
||||
// Indicate default assumption of intra in the motion map
|
||||
*fp_motion_map_ptr = 0;
|
||||
|
||||
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
|
||||
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
|
||||
@@ -667,16 +599,13 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
int motion_error = INT_MAX;
|
||||
|
||||
// Simple 0,0 motion with no mv overhead
|
||||
vp8_zz_motion_search( cpi, x, lst_yv12, &motion_error, recon_yoffset );
|
||||
zz_motion_search( cpi, x, lst_yv12, &motion_error, recon_yoffset );
|
||||
d->bmi.mv.as_mv.row = 0;
|
||||
d->bmi.mv.as_mv.col = 0;
|
||||
|
||||
// Save (0,0) error for later use
|
||||
zero_error = motion_error;
|
||||
|
||||
// Test last reference frame using the previous best mv as the
|
||||
// starting point (best reference) for the search
|
||||
vp8_first_pass_motion_search(cpi, x, &best_ref_mv.as_mv,
|
||||
first_pass_motion_search(cpi, x, &best_ref_mv.as_mv,
|
||||
&d->bmi.mv.as_mv, lst_yv12,
|
||||
&motion_error, recon_yoffset);
|
||||
|
||||
@@ -684,7 +613,7 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
if (best_ref_mv.as_int)
|
||||
{
|
||||
tmp_err = INT_MAX;
|
||||
vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv,
|
||||
first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv,
|
||||
lst_yv12, &tmp_err, recon_yoffset);
|
||||
|
||||
if ( tmp_err < motion_error )
|
||||
@@ -698,7 +627,7 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
// Experimental search in a second reference frame ((0,0) based only)
|
||||
if (cm->current_video_frame > 1)
|
||||
{
|
||||
vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset);
|
||||
first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset);
|
||||
|
||||
if ((gf_motion_error < motion_error) && (gf_motion_error < this_error))
|
||||
{
|
||||
@@ -726,6 +655,17 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
|
||||
if (motion_error <= this_error)
|
||||
{
|
||||
// Keep a count of cases where the inter and intra were
|
||||
// very close and very low. This helps with scene cut
|
||||
// detection for example in cropped clips with black bars
|
||||
// at the sides or top and bottom.
|
||||
if( (((this_error-intrapenalty) * 9) <=
|
||||
(motion_error*10)) &&
|
||||
(this_error < (2*intrapenalty)) )
|
||||
{
|
||||
neutral_count++;
|
||||
}
|
||||
|
||||
d->bmi.mv.as_mv.row <<= 3;
|
||||
d->bmi.mv.as_mv.col <<= 3;
|
||||
this_error = motion_error;
|
||||
@@ -777,25 +717,6 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
else if (d->bmi.mv.as_mv.col < 0)
|
||||
sum_in_vectors--;
|
||||
}
|
||||
|
||||
// Compute how close (0,0) predictor is to best
|
||||
// predictor in terms of their prediction error
|
||||
zz_to_best_ratio = (10*zero_error + this_error/2)
|
||||
/ (this_error+!this_error);
|
||||
|
||||
if ((zero_error < 50000) &&
|
||||
(zz_to_best_ratio <= 11) )
|
||||
*fp_motion_map_ptr = 1;
|
||||
else
|
||||
*fp_motion_map_ptr = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 0,0 mv was best
|
||||
if( zero_error<50000 )
|
||||
*fp_motion_map_ptr = 2;
|
||||
else
|
||||
*fp_motion_map_ptr = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -809,9 +730,6 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
|
||||
recon_yoffset += 16;
|
||||
recon_uvoffset += 8;
|
||||
|
||||
// Update the motion map
|
||||
fp_motion_map_ptr++;
|
||||
}
|
||||
|
||||
// adjust to the next row of mbs
|
||||
@@ -833,7 +751,7 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
fps.frame = cm->current_video_frame ;
|
||||
fps.intra_error = intra_error >> 8;
|
||||
fps.coded_error = coded_error >> 8;
|
||||
weight = vp8_simple_weight(cpi->Source);
|
||||
weight = simple_weight(cpi->Source);
|
||||
|
||||
|
||||
if (weight < 0.1)
|
||||
@@ -854,6 +772,7 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
|
||||
fps.pcnt_inter = 1.0 * (double)intercount / cm->MBs;
|
||||
fps.pcnt_second_ref = 1.0 * (double)second_ref_count / cm->MBs;
|
||||
fps.pcnt_neutral = 1.0 * (double)neutral_count / cm->MBs;
|
||||
|
||||
if (mvcount > 0)
|
||||
{
|
||||
@@ -872,15 +791,12 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
// than the full time between subsequent cpi->source_time_stamp s .
|
||||
fps.duration = cpi->source_end_time_stamp - cpi->source_time_stamp;
|
||||
|
||||
// don't want to do outputstats with a stack variable!
|
||||
// don't want to do output stats with a stack variable!
|
||||
memcpy(cpi->this_frame_stats,
|
||||
&fps,
|
||||
sizeof(FIRSTPASS_STATS));
|
||||
memcpy((char*)cpi->this_frame_stats + sizeof(FIRSTPASS_STATS),
|
||||
cpi->fp_motion_map,
|
||||
sizeof(cpi->fp_motion_map[0]) * cpi->common.MBs);
|
||||
vp8_output_stats(cpi, cpi->output_pkt_list, cpi->this_frame_stats);
|
||||
vp8_accumulate_stats(cpi->total_stats, &fps);
|
||||
output_stats(cpi, cpi->output_pkt_list, cpi->this_frame_stats);
|
||||
accumulate_stats(cpi->total_stats, &fps);
|
||||
}
|
||||
|
||||
// Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met
|
||||
@@ -924,10 +840,10 @@ void vp8_first_pass(VP8_COMP *cpi)
|
||||
extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
|
||||
|
||||
#define BASE_ERRPERMB 150
|
||||
static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, int Height, int Width)
|
||||
static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh)
|
||||
{
|
||||
int Q;
|
||||
int num_mbs = ((Height * Width) / (16 * 16));
|
||||
int num_mbs = cpi->common.MBs;
|
||||
int target_norm_bits_per_mb;
|
||||
|
||||
double err_per_mb = section_err / num_mbs;
|
||||
@@ -1024,10 +940,10 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_
|
||||
|
||||
return Q;
|
||||
}
|
||||
static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, int Height, int Width)
|
||||
static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh)
|
||||
{
|
||||
int Q;
|
||||
int num_mbs = ((Height * Width) / (16 * 16));
|
||||
int num_mbs = cpi->common.MBs;
|
||||
int target_norm_bits_per_mb;
|
||||
|
||||
double err_per_mb = section_err / num_mbs;
|
||||
@@ -1075,10 +991,10 @@ static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_band
|
||||
}
|
||||
|
||||
// Estimate a worst case Q for a KF group
|
||||
static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, int Height, int Width, double group_iiratio)
|
||||
static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, double group_iiratio)
|
||||
{
|
||||
int Q;
|
||||
int num_mbs = ((Height * Width) / (16 * 16));
|
||||
int num_mbs = cpi->common.MBs;
|
||||
int target_norm_bits_per_mb = (512 * section_target_bandwitdh) / num_mbs;
|
||||
int bits_per_mb_at_this_q;
|
||||
|
||||
@@ -1173,11 +1089,10 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
|
||||
|
||||
// For cq mode estimate a cq level that matches the observed
|
||||
// complexity and data rate.
|
||||
static int estimate_cq(VP8_COMP *cpi, double section_err,
|
||||
int section_target_bandwitdh, int Height, int Width)
|
||||
static int estimate_cq(VP8_COMP *cpi, double section_err, int section_target_bandwitdh)
|
||||
{
|
||||
int Q;
|
||||
int num_mbs = ((Height * Width) / (16 * 16));
|
||||
int num_mbs = cpi->common.MBs;
|
||||
int target_norm_bits_per_mb;
|
||||
|
||||
double err_per_mb = section_err / num_mbs;
|
||||
@@ -1252,7 +1167,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
|
||||
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
|
||||
|
||||
vp8_zero_stats(cpi->total_stats);
|
||||
zero_stats(cpi->total_stats);
|
||||
|
||||
if (!cpi->stats_in_end)
|
||||
return;
|
||||
@@ -1286,7 +1201,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
cpi->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
|
||||
cpi->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
|
||||
|
||||
vp8_avg_stats(cpi->total_stats);
|
||||
avg_stats(cpi->total_stats);
|
||||
|
||||
// Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence
|
||||
{
|
||||
@@ -1295,7 +1210,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
|
||||
start_pos = cpi->stats_in; // Note starting "file" position
|
||||
|
||||
while (vp8_input_stats(cpi, &this_frame) != EOF)
|
||||
while (input_stats(cpi, &this_frame) != EOF)
|
||||
{
|
||||
IIRatio = this_frame.intra_error / DOUBLE_DIVIDE_CHECK(this_frame.coded_error);
|
||||
IIRatio = (IIRatio < 1.0) ? 1.0 : (IIRatio > 20.0) ? 20.0 : IIRatio;
|
||||
@@ -1316,7 +1231,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
cpi->modified_error_total = 0.0;
|
||||
cpi->modified_error_used = 0.0;
|
||||
|
||||
while (vp8_input_stats(cpi, &this_frame) != EOF)
|
||||
while (input_stats(cpi, &this_frame) != EOF)
|
||||
{
|
||||
cpi->modified_error_total += calculate_modified_err(cpi, &this_frame);
|
||||
}
|
||||
@@ -1331,8 +1246,6 @@ void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
cpi->clip_bpe = cpi->bits_left /
|
||||
DOUBLE_DIVIDE_CHECK(cpi->modified_error_total);
|
||||
cpi->observed_bpe = cpi->clip_bpe;
|
||||
|
||||
cpi->fp_motion_map_stats = (unsigned char *)cpi->stats_in;
|
||||
}
|
||||
|
||||
void vp8_end_second_pass(VP8_COMP *cpi)
|
||||
@@ -1340,8 +1253,8 @@ void vp8_end_second_pass(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
// This function gives and estimate of how badly we believe
|
||||
// the predicition quality is decaying from frame to frame.
|
||||
double gf_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
|
||||
// the prediction quality is decaying from frame to frame.
|
||||
static double get_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
|
||||
{
|
||||
double prediction_decay_rate;
|
||||
double motion_decay;
|
||||
@@ -1376,6 +1289,52 @@ double gf_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
|
||||
return prediction_decay_rate;
|
||||
}
|
||||
|
||||
// Function to test for a condition where a complex transition is followed
|
||||
// by a static section. For example in slide shows where there is a fade
|
||||
// between slides. This is to help with more optimal kf and gf positioning.
|
||||
static int detect_transition_to_still(
|
||||
VP8_COMP *cpi,
|
||||
int frame_interval,
|
||||
int still_interval,
|
||||
double loop_decay_rate,
|
||||
double decay_accumulator )
|
||||
{
|
||||
BOOL trans_to_still = FALSE;
|
||||
|
||||
// Break clause to detect very still sections after motion
|
||||
// For example a static image after a fade or other transition
|
||||
// instead of a clean scene cut.
|
||||
if ( (frame_interval > MIN_GF_INTERVAL) &&
|
||||
(loop_decay_rate >= 0.999) &&
|
||||
(decay_accumulator < 0.9) )
|
||||
{
|
||||
int j;
|
||||
FIRSTPASS_STATS * position = cpi->stats_in;
|
||||
FIRSTPASS_STATS tmp_next_frame;
|
||||
double decay_rate;
|
||||
|
||||
// Look ahead a few frames to see if static condition
|
||||
// persists...
|
||||
for ( j = 0; j < still_interval; j++ )
|
||||
{
|
||||
if (EOF == input_stats(cpi, &tmp_next_frame))
|
||||
break;
|
||||
|
||||
decay_rate = get_prediction_decay_rate(cpi, &tmp_next_frame);
|
||||
if ( decay_rate < 0.999 )
|
||||
break;
|
||||
}
|
||||
// Reset file position
|
||||
reset_fpf_position(cpi, position);
|
||||
|
||||
// Only if it does do we signal a transition to still
|
||||
if ( j == still_interval )
|
||||
trans_to_still = TRUE;
|
||||
}
|
||||
|
||||
return trans_to_still;
|
||||
}
|
||||
|
||||
// Analyse and define a gf/arf group .
|
||||
static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
{
|
||||
@@ -1406,8 +1365,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
int max_bits = frame_max_bits(cpi); // Max for a single frame
|
||||
|
||||
unsigned char *fpmm_pos;
|
||||
|
||||
unsigned int allow_alt_ref =
|
||||
cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
|
||||
|
||||
@@ -1416,8 +1373,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
vp8_clear_system_state(); //__asm emms;
|
||||
|
||||
fpmm_pos = vp8_fpmm_get_pos(cpi);
|
||||
|
||||
start_pos = cpi->stats_in;
|
||||
|
||||
vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
|
||||
@@ -1461,7 +1416,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
mod_err_per_mb_accumulator +=
|
||||
mod_frame_err / DOUBLE_DIVIDE_CHECK((double)cpi->common.MBs);
|
||||
|
||||
if (EOF == vp8_input_stats(cpi, &next_frame))
|
||||
if (EOF == input_stats(cpi, &next_frame))
|
||||
break;
|
||||
|
||||
// Accumulate motion stats.
|
||||
@@ -1528,7 +1483,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
if (r > GF_RMAX)
|
||||
r = GF_RMAX;
|
||||
|
||||
loop_decay_rate = gf_prediction_decay_rate(cpi, &next_frame);
|
||||
loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
|
||||
|
||||
// Cumulative effect of decay
|
||||
decay_accumulator = decay_accumulator * loop_decay_rate;
|
||||
@@ -1537,48 +1492,13 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
boost_score += (decay_accumulator * r);
|
||||
|
||||
// Break clause to detect very still sections after motion
|
||||
// For example a staic image after a fade or other transition
|
||||
// instead of a clean key frame.
|
||||
if ( (i > MIN_GF_INTERVAL) &&
|
||||
(loop_decay_rate >= 0.999) &&
|
||||
(decay_accumulator < 0.9) )
|
||||
// For example a staic image after a fade or other transition.
|
||||
if ( detect_transition_to_still( cpi, i, 5,
|
||||
loop_decay_rate, decay_accumulator ) )
|
||||
{
|
||||
int j;
|
||||
FIRSTPASS_STATS * position = cpi->stats_in;
|
||||
FIRSTPASS_STATS tmp_next_frame;
|
||||
double decay_rate;
|
||||
|
||||
// Look ahead a few frames to see if static condition
|
||||
// persists...
|
||||
for ( j = 0; j < 4; j++ )
|
||||
{
|
||||
if (EOF == vp8_input_stats(cpi, &tmp_next_frame))
|
||||
break;
|
||||
|
||||
decay_rate = gf_prediction_decay_rate(cpi, &tmp_next_frame);
|
||||
if ( decay_rate < 0.999 )
|
||||
break;
|
||||
}
|
||||
reset_fpf_position(cpi, position); // Reset file position
|
||||
|
||||
// Force GF not alt ref
|
||||
if ( j == 4 )
|
||||
{
|
||||
if (0)
|
||||
{
|
||||
FILE *f = fopen("fadegf.stt", "a");
|
||||
fprintf(f, " %8d %8d %10.4f %10.4f %10.4f\n",
|
||||
cpi->common.current_video_frame+i, i,
|
||||
loop_decay_rate, decay_accumulator,
|
||||
boost_score );
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
allow_alt_ref = FALSE;
|
||||
|
||||
boost_score = old_boost_score;
|
||||
break;
|
||||
}
|
||||
allow_alt_ref = FALSE;
|
||||
boost_score = old_boost_score;
|
||||
break;
|
||||
}
|
||||
|
||||
// Break out conditions.
|
||||
@@ -1686,7 +1606,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
arf_frame_bits = (int)((double)Boost * (group_bits / (double)allocation_chunks));
|
||||
|
||||
// Estimate if there are enough bits available to make worthwhile use of an arf.
|
||||
tmp_q = estimate_q(cpi, mod_frame_err, (int)arf_frame_bits, cpi->common.Height, cpi->common.Width);
|
||||
tmp_q = estimate_q(cpi, mod_frame_err, (int)arf_frame_bits);
|
||||
|
||||
// Only use an arf if it is likely we will be able to code it at a lower Q than the surrounding frames.
|
||||
if (tmp_q < cpi->worst_quality)
|
||||
@@ -1749,20 +1669,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
}
|
||||
|
||||
cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd;
|
||||
|
||||
{
|
||||
// Advance to & read in the motion map for those frames
|
||||
// to be considered for filtering based on the position
|
||||
// of the ARF
|
||||
vp8_fpmm_reset_pos(cpi, cpi->fp_motion_map_stats_save);
|
||||
|
||||
// Position at the 'earliest' frame to be filtered
|
||||
vp8_advance_fpmm(cpi,
|
||||
cpi->baseline_gf_interval - frames_bwd);
|
||||
|
||||
// Read / create a motion map for the region of interest
|
||||
vp8_input_fpmm(cpi);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1784,7 +1690,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
{
|
||||
while (cpi->baseline_gf_interval < cpi->frames_to_key)
|
||||
{
|
||||
if (EOF == vp8_input_stats(cpi, this_frame))
|
||||
if (EOF == input_stats(cpi, this_frame))
|
||||
break;
|
||||
|
||||
cpi->baseline_gf_interval++;
|
||||
@@ -1963,16 +1869,16 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
FIRSTPASS_STATS sectionstats;
|
||||
double Ratio;
|
||||
|
||||
vp8_zero_stats(§ionstats);
|
||||
zero_stats(§ionstats);
|
||||
reset_fpf_position(cpi, start_pos);
|
||||
|
||||
for (i = 0 ; i < cpi->baseline_gf_interval ; i++)
|
||||
{
|
||||
vp8_input_stats(cpi, &next_frame);
|
||||
vp8_accumulate_stats(§ionstats, &next_frame);
|
||||
input_stats(cpi, &next_frame);
|
||||
accumulate_stats(§ionstats, &next_frame);
|
||||
}
|
||||
|
||||
vp8_avg_stats(§ionstats);
|
||||
avg_stats(§ionstats);
|
||||
|
||||
cpi->section_intra_rating =
|
||||
sectionstats.intra_error /
|
||||
@@ -1992,9 +1898,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
reset_fpf_position(cpi, start_pos);
|
||||
}
|
||||
|
||||
// Reset the First pass motion map file position
|
||||
vp8_fpmm_reset_pos(cpi, fpmm_pos);
|
||||
}
|
||||
|
||||
// Allocate bits to a normal frame that is neither a gf an arf or a key frame.
|
||||
@@ -2073,16 +1976,9 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
|
||||
vp8_clear_system_state();
|
||||
|
||||
if (EOF == vp8_input_stats(cpi, &this_frame))
|
||||
if (EOF == input_stats(cpi, &this_frame))
|
||||
return;
|
||||
|
||||
vpx_memset(cpi->fp_motion_map, 0,
|
||||
cpi->oxcf.arnr_max_frames*cpi->common.MBs);
|
||||
cpi->fp_motion_map_stats_save = vp8_fpmm_get_pos(cpi);
|
||||
|
||||
// Step over this frame's first pass motion map
|
||||
vp8_advance_fpmm(cpi, 1);
|
||||
|
||||
this_frame_error = this_frame.ssim_weighted_pred_err;
|
||||
this_frame_intra_error = this_frame.intra_error;
|
||||
this_frame_coded_error = this_frame.coded_error;
|
||||
@@ -2101,7 +1997,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
{
|
||||
// Define next KF group and assign bits to it
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
vp8_find_next_key_frame(cpi, &this_frame_copy);
|
||||
find_next_key_frame(cpi, &this_frame_copy);
|
||||
|
||||
// Special case: Error error_resilient_mode mode does not make much sense for two pass but with its current meaning but this code is designed to stop
|
||||
// outlandish behaviour if someone does set it when using two pass. It effectively disables GF groups.
|
||||
@@ -2214,8 +2110,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
est_cq =
|
||||
estimate_cq( cpi,
|
||||
(cpi->total_coded_error_left / frames_left),
|
||||
(int)(cpi->bits_left / frames_left),
|
||||
cpi->common.Height, cpi->common.Width);
|
||||
(int)(cpi->bits_left / frames_left));
|
||||
|
||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
||||
if ( est_cq > cpi->cq_target_quality )
|
||||
@@ -2227,9 +2122,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
cpi->maxq_min_limit = cpi->best_quality;
|
||||
tmp_q = estimate_max_q( cpi,
|
||||
(cpi->total_coded_error_left / frames_left),
|
||||
(int)(cpi->bits_left / frames_left),
|
||||
cpi->common.Height,
|
||||
cpi->common.Width);
|
||||
(int)(cpi->bits_left / frames_left));
|
||||
|
||||
// Limit the maxq value returned subsequently.
|
||||
// This increases the risk of overspend or underspend if the initial
|
||||
@@ -2257,7 +2150,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
if (frames_left < 1)
|
||||
frames_left = 1;
|
||||
|
||||
tmp_q = estimate_max_q(cpi, (cpi->total_coded_error_left / frames_left), (int)(cpi->bits_left / frames_left), cpi->common.Height, cpi->common.Width);
|
||||
tmp_q = estimate_max_q(cpi, (cpi->total_coded_error_left / frames_left), (int)(cpi->bits_left / frames_left));
|
||||
|
||||
// Move active_worst_quality but in a damped way
|
||||
if (tmp_q > cpi->active_worst_quality)
|
||||
@@ -2285,7 +2178,7 @@ static BOOL test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRST
|
||||
(next_frame->pcnt_second_ref < 0.10) &&
|
||||
((this_frame->pcnt_inter < 0.05) ||
|
||||
(
|
||||
(this_frame->pcnt_inter < .25) &&
|
||||
((this_frame->pcnt_inter - this_frame->pcnt_neutral) < .25) &&
|
||||
((this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < 2.5) &&
|
||||
((fabs(last_frame->coded_error - this_frame->coded_error) / DOUBLE_DIVIDE_CHECK(this_frame->coded_error) > .40) ||
|
||||
(fabs(last_frame->intra_error - this_frame->intra_error) / DOUBLE_DIVIDE_CHECK(this_frame->intra_error) > .40) ||
|
||||
@@ -2332,7 +2225,9 @@ static BOOL test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRST
|
||||
// Test various breakout clauses
|
||||
if ((local_next_frame.pcnt_inter < 0.05) ||
|
||||
(next_iiratio < 1.5) ||
|
||||
((local_next_frame.pcnt_inter < 0.20) && (next_iiratio < 3.0)) ||
|
||||
(((local_next_frame.pcnt_inter -
|
||||
local_next_frame.pcnt_neutral) < 0.20) &&
|
||||
(next_iiratio < 3.0)) ||
|
||||
((boost_score - old_boost_score) < 0.5) ||
|
||||
(local_next_frame.intra_error < 200)
|
||||
)
|
||||
@@ -2343,7 +2238,7 @@ static BOOL test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRST
|
||||
old_boost_score = boost_score;
|
||||
|
||||
// Get the next frame details
|
||||
if (EOF == vp8_input_stats(cpi, &local_next_frame))
|
||||
if (EOF == input_stats(cpi, &local_next_frame))
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -2361,15 +2256,15 @@ static BOOL test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRST
|
||||
|
||||
return is_viable_kf;
|
||||
}
|
||||
void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
{
|
||||
int i;
|
||||
int i,j;
|
||||
FIRSTPASS_STATS last_frame;
|
||||
FIRSTPASS_STATS first_frame;
|
||||
FIRSTPASS_STATS next_frame;
|
||||
FIRSTPASS_STATS *start_position;
|
||||
|
||||
double decay_accumulator = 0;
|
||||
double decay_accumulator = 1.0;
|
||||
double boost_score = 0;
|
||||
double old_boost_score = 0.0;
|
||||
double loop_decay_rate;
|
||||
@@ -2379,6 +2274,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
double kf_group_intra_err = 0.0;
|
||||
double kf_group_coded_err = 0.0;
|
||||
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
|
||||
double recent_loop_decay[8] = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0};
|
||||
|
||||
vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
|
||||
|
||||
@@ -2407,6 +2303,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
kf_mod_err = calculate_modified_err(cpi, this_frame);
|
||||
|
||||
// find the next keyframe
|
||||
i = 0;
|
||||
while (cpi->stats_in < cpi->stats_in_end)
|
||||
{
|
||||
// Accumulate kf group error
|
||||
@@ -2419,15 +2316,40 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
// load a the next frame's stats
|
||||
vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
|
||||
vp8_input_stats(cpi, this_frame);
|
||||
input_stats(cpi, this_frame);
|
||||
|
||||
// Provided that we are not at the end of the file...
|
||||
if (cpi->oxcf.auto_key
|
||||
&& lookup_next_frame_stats(cpi, &next_frame) != EOF)
|
||||
{
|
||||
// Normal scene cut check
|
||||
if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
|
||||
break;
|
||||
|
||||
// How fast is prediction quality decaying
|
||||
loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
|
||||
|
||||
// We want to know something about the recent past... rather than
|
||||
// as used elsewhere where we are concened with decay in prediction
|
||||
// quality since the last GF or KF.
|
||||
recent_loop_decay[i%8] = loop_decay_rate;
|
||||
decay_accumulator = 1.0;
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
decay_accumulator = decay_accumulator * recent_loop_decay[j];
|
||||
}
|
||||
|
||||
// Special check for transition or high motion followed by a
|
||||
// to a static scene.
|
||||
if ( detect_transition_to_still( cpi, i,
|
||||
(cpi->key_frame_frequency-i),
|
||||
loop_decay_rate,
|
||||
decay_accumulator ) )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// Step on to the next frame
|
||||
cpi->frames_to_key ++;
|
||||
|
||||
@@ -2437,6 +2359,8 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
break;
|
||||
} else
|
||||
cpi->frames_to_key ++;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
// If there is a max kf interval set by the user we must obey it.
|
||||
@@ -2470,7 +2394,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
kf_group_coded_err += tmp_frame.coded_error;
|
||||
|
||||
// Load a the next frame's stats
|
||||
vp8_input_stats(cpi, &tmp_frame);
|
||||
input_stats(cpi, &tmp_frame);
|
||||
}
|
||||
|
||||
// Reset to the start of the group
|
||||
@@ -2575,7 +2499,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
double motion_decay;
|
||||
double motion_pct;
|
||||
|
||||
if (EOF == vp8_input_stats(cpi, &next_frame))
|
||||
if (EOF == input_stats(cpi, &next_frame))
|
||||
break;
|
||||
|
||||
if (next_frame.intra_error > cpi->kf_intra_err_min)
|
||||
@@ -2588,32 +2512,8 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
if (r > RMAX)
|
||||
r = RMAX;
|
||||
|
||||
// Adjust loop decay rate
|
||||
//if ( next_frame.pcnt_inter < loop_decay_rate )
|
||||
loop_decay_rate = next_frame.pcnt_inter;
|
||||
|
||||
// High % motion -> somewhat higher decay rate
|
||||
motion_pct = next_frame.pcnt_motion;
|
||||
motion_decay = (1.0 - (motion_pct / 20.0));
|
||||
if (motion_decay < loop_decay_rate)
|
||||
loop_decay_rate = motion_decay;
|
||||
|
||||
// Adjustment to decay rate based on speed of motion
|
||||
{
|
||||
double this_mv_rabs;
|
||||
double this_mv_cabs;
|
||||
double distance_factor;
|
||||
|
||||
this_mv_rabs = fabs(next_frame.mvr_abs * motion_pct);
|
||||
this_mv_cabs = fabs(next_frame.mvc_abs * motion_pct);
|
||||
|
||||
distance_factor = sqrt((this_mv_rabs * this_mv_rabs) +
|
||||
(this_mv_cabs * this_mv_cabs)) / 250.0;
|
||||
distance_factor = ((distance_factor > 1.0)
|
||||
? 0.0 : (1.0 - distance_factor));
|
||||
if (distance_factor < loop_decay_rate)
|
||||
loop_decay_rate = distance_factor;
|
||||
}
|
||||
// How fast is prediction quality decaying
|
||||
loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
|
||||
|
||||
decay_accumulator = decay_accumulator * loop_decay_rate;
|
||||
decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator;
|
||||
@@ -2634,16 +2534,16 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
FIRSTPASS_STATS sectionstats;
|
||||
double Ratio;
|
||||
|
||||
vp8_zero_stats(§ionstats);
|
||||
zero_stats(§ionstats);
|
||||
reset_fpf_position(cpi, start_position);
|
||||
|
||||
for (i = 0 ; i < cpi->frames_to_key ; i++)
|
||||
{
|
||||
vp8_input_stats(cpi, &next_frame);
|
||||
vp8_accumulate_stats(§ionstats, &next_frame);
|
||||
input_stats(cpi, &next_frame);
|
||||
accumulate_stats(§ionstats, &next_frame);
|
||||
}
|
||||
|
||||
vp8_avg_stats(§ionstats);
|
||||
avg_stats(§ionstats);
|
||||
|
||||
cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
|
||||
@@ -2859,7 +2759,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
bits_per_frame = (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
|
||||
|
||||
// Work out if spatial resampling is necessary
|
||||
kf_q = estimate_kf_group_q(cpi, err_per_frame, bits_per_frame, new_height, new_width, group_iiratio);
|
||||
kf_q = estimate_kf_group_q(cpi, err_per_frame, bits_per_frame, group_iiratio);
|
||||
|
||||
// If we project a required Q higher than the maximum allowed Q then make a guess at the actual size of frames in this section
|
||||
projected_bits_perframe = bits_per_frame;
|
||||
@@ -2930,7 +2830,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
effective_size_ratio = (1.0 + (3.0 * effective_size_ratio)) / 4.0;
|
||||
|
||||
// Now try again and see what Q we get with the smaller image size
|
||||
kf_q = estimate_kf_group_q(cpi, err_per_frame * effective_size_ratio, bits_per_frame, new_height, new_width, group_iiratio);
|
||||
kf_q = estimate_kf_group_q(cpi, err_per_frame * effective_size_ratio, bits_per_frame, group_iiratio);
|
||||
|
||||
if (0)
|
||||
{
|
||||
|
@@ -17,8 +17,6 @@
|
||||
void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
|
||||
void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
|
||||
|
||||
|
||||
void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
|
||||
extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d);
|
||||
|
||||
void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
|
||||
@@ -103,6 +101,10 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
|
||||
// Pure C:
|
||||
vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
|
||||
|
||||
#if CONFIG_PSNR
|
||||
cpi->rtcd.variance.ssimpf_8x8 = ssim_parms_8x8_c;
|
||||
cpi->rtcd.variance.ssimpf = ssim_parms_c;
|
||||
#endif
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
vp8_arch_x86_encoder_init(cpi);
|
||||
|
@@ -43,7 +43,7 @@ int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight)
|
||||
return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * Weight) >> 7;
|
||||
}
|
||||
|
||||
int vp8_mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit)
|
||||
static int mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit)
|
||||
{
|
||||
//int i;
|
||||
//return ((mvcost[0][(mv->row - ref->row)>>1] + mvcost[1][(mv->col - ref->col)>>1] + 128) * error_per_bit) >> 8;
|
||||
@@ -221,7 +221,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
|
||||
// calculate central point error
|
||||
besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
|
||||
besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
// TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
|
||||
while (--halfiters)
|
||||
@@ -337,13 +337,13 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
|
||||
// calculate central point error
|
||||
bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
|
||||
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
// go left then right and check error
|
||||
this_mv.row = startmv.row;
|
||||
this_mv.col = ((startmv.col - 8) | 4);
|
||||
left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
|
||||
left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (left < bestmse)
|
||||
{
|
||||
@@ -353,7 +353,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
|
||||
this_mv.col += 8;
|
||||
right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (right < bestmse)
|
||||
{
|
||||
@@ -365,7 +365,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
this_mv.col = startmv.col;
|
||||
this_mv.row = ((startmv.row - 8) | 4);
|
||||
up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
|
||||
up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (up < bestmse)
|
||||
{
|
||||
@@ -375,7 +375,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
|
||||
this_mv.row += 8;
|
||||
down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (down < bestmse)
|
||||
{
|
||||
@@ -415,7 +415,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
break;
|
||||
}
|
||||
|
||||
diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (diag < bestmse)
|
||||
{
|
||||
@@ -451,7 +451,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
|
||||
}
|
||||
|
||||
left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (left < bestmse)
|
||||
{
|
||||
@@ -461,7 +461,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
|
||||
this_mv.col += 4;
|
||||
right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
|
||||
right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (right < bestmse)
|
||||
{
|
||||
@@ -483,7 +483,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
|
||||
}
|
||||
|
||||
up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (up < bestmse)
|
||||
{
|
||||
@@ -493,7 +493,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
|
||||
this_mv.row += 4;
|
||||
down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
|
||||
down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (down < bestmse)
|
||||
{
|
||||
@@ -582,7 +582,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
break;
|
||||
}
|
||||
|
||||
diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (diag < bestmse)
|
||||
{
|
||||
@@ -621,13 +621,13 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
|
||||
// calculate central point error
|
||||
bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
|
||||
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
// go left then right and check error
|
||||
this_mv.row = startmv.row;
|
||||
this_mv.col = ((startmv.col - 8) | 4);
|
||||
left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
|
||||
left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (left < bestmse)
|
||||
{
|
||||
@@ -637,7 +637,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
|
||||
this_mv.col += 8;
|
||||
right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (right < bestmse)
|
||||
{
|
||||
@@ -649,7 +649,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
this_mv.col = startmv.col;
|
||||
this_mv.row = ((startmv.row - 8) | 4);
|
||||
up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
|
||||
up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (up < bestmse)
|
||||
{
|
||||
@@ -659,7 +659,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
|
||||
this_mv.row += 8;
|
||||
down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (down < bestmse)
|
||||
{
|
||||
@@ -697,7 +697,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
break;
|
||||
}
|
||||
|
||||
diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (diag < bestmse)
|
||||
{
|
||||
@@ -709,7 +709,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
this_mv.col = (this_mv.col - 8) | 4;
|
||||
this_mv.row = (this_mv.row - 8) | 4;
|
||||
diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
|
||||
diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (diag < bestmse)
|
||||
{
|
||||
@@ -719,7 +719,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
|
||||
this_mv.col += 8;
|
||||
diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
|
||||
diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (diag < bestmse)
|
||||
{
|
||||
@@ -730,7 +730,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
this_mv.col = (this_mv.col - 8) | 4;
|
||||
this_mv.row = startmv.row + 4;
|
||||
diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
|
||||
diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (diag < bestmse)
|
||||
{
|
||||
@@ -740,7 +740,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
|
||||
|
||||
this_mv.col += 8;
|
||||
diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
|
||||
if (diag < bestmse)
|
||||
{
|
||||
@@ -894,7 +894,7 @@ cal_neighbors:
|
||||
best_mv->row = br;
|
||||
best_mv->col = bc;
|
||||
|
||||
return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + vp8_mv_err_cost(best_mv, center_mv, mvcost, error_per_bit) ;
|
||||
return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + mv_err_cost(best_mv, center_mv, mvcost, error_per_bit) ;
|
||||
}
|
||||
#undef MVC
|
||||
#undef PRE
|
||||
@@ -955,7 +955,7 @@ int vp8_diamond_search_sad
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Check the starting position
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// search_param determines the length of the initial step and hence the number of iterations
|
||||
@@ -986,7 +986,7 @@ int vp8_diamond_search_sad
|
||||
{
|
||||
this_mv.row = this_row_offset << 3;
|
||||
this_mv.col = this_col_offset << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1017,7 +1017,7 @@ int vp8_diamond_search_sad
|
||||
return INT_MAX;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
+ mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
}
|
||||
|
||||
int vp8_diamond_search_sadx4
|
||||
@@ -1071,7 +1071,7 @@ int vp8_diamond_search_sadx4
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Check the starting position
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// search_param determines the length of the initial step and hence the number of iterations
|
||||
@@ -1113,7 +1113,7 @@ int vp8_diamond_search_sadx4
|
||||
{
|
||||
this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
|
||||
this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
|
||||
sad_array[t] += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
sad_array[t] += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (sad_array[t] < bestsad)
|
||||
{
|
||||
@@ -1142,7 +1142,7 @@ int vp8_diamond_search_sadx4
|
||||
{
|
||||
this_mv.row = this_row_offset << 3;
|
||||
this_mv.col = this_col_offset << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1173,7 +1173,7 @@ int vp8_diamond_search_sadx4
|
||||
return INT_MAX;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
+ mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
}
|
||||
|
||||
|
||||
@@ -1215,8 +1215,8 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
|
||||
{
|
||||
// Baseline value at the centre
|
||||
|
||||
//bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
//bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
|
||||
@@ -1242,9 +1242,9 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
|
||||
|
||||
this_mv.col = c << 3;
|
||||
//thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
|
||||
//thissad += (int)sqrt(mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
|
||||
//thissad += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
|
||||
thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1263,7 +1263,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
|
||||
|
||||
if (bestsad < INT_MAX)
|
||||
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
+ mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
else
|
||||
return INT_MAX;
|
||||
}
|
||||
@@ -1306,7 +1306,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Baseline value at the centre
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
|
||||
@@ -1341,7 +1341,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1364,7 +1364,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1386,7 +1386,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
|
||||
if (bestsad < INT_MAX)
|
||||
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
+ mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
else
|
||||
return INT_MAX;
|
||||
}
|
||||
@@ -1415,7 +1415,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
int col_min = ref_col - distance;
|
||||
int col_max = ref_col + distance;
|
||||
|
||||
unsigned short sad_array8[8];
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
|
||||
unsigned int sad_array[3];
|
||||
|
||||
// Work out the mid point for the search
|
||||
@@ -1430,7 +1430,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Baseline value at the centre
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
|
||||
@@ -1465,7 +1465,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1494,7 +1494,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1517,7 +1517,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1538,7 +1538,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
|
||||
if (bestsad < INT_MAX)
|
||||
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
+ mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
else
|
||||
return INT_MAX;
|
||||
}
|
||||
|
@@ -70,7 +70,6 @@ extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_
|
||||
|
||||
int vp8_estimate_entropy_savings(VP8_COMP *cpi);
|
||||
int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
|
||||
int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
|
||||
|
||||
extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi);
|
||||
|
||||
@@ -86,9 +85,11 @@ extern double vp8_calc_ssim
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *dest,
|
||||
int lumamask,
|
||||
double *weight
|
||||
double *weight,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd
|
||||
);
|
||||
|
||||
|
||||
extern double vp8_calc_ssimg
|
||||
(
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
@@ -259,7 +260,7 @@ static void setup_features(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
|
||||
void vp8_dealloc_compressor_data(VP8_COMP *cpi)
|
||||
static void dealloc_compressor_data(VP8_COMP *cpi)
|
||||
{
|
||||
vpx_free(cpi->tplist);
|
||||
cpi->tplist = NULL;
|
||||
@@ -281,12 +282,6 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
|
||||
vpx_free(cpi->active_map);
|
||||
cpi->active_map = 0;
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
// Delete first pass motion map
|
||||
vpx_free(cpi->fp_motion_map);
|
||||
cpi->fp_motion_map = 0;
|
||||
#endif
|
||||
|
||||
vp8_de_alloc_frame_buffers(&cpi->common);
|
||||
|
||||
vp8_yv12_de_alloc_frame_buffer(&cpi->last_frame_uf);
|
||||
@@ -1360,11 +1355,11 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
vpx_free(cpi->total_stats);
|
||||
|
||||
cpi->total_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
|
||||
cpi->total_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS));
|
||||
|
||||
vpx_free(cpi->this_frame_stats);
|
||||
|
||||
cpi->this_frame_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
|
||||
cpi->this_frame_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS));
|
||||
|
||||
if(!cpi->total_stats || !cpi->this_frame_stats)
|
||||
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
|
||||
@@ -1457,13 +1452,12 @@ rescale(int val, int num, int denom)
|
||||
}
|
||||
|
||||
|
||||
void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
static void init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
{
|
||||
VP8_COMP *cpi = (VP8_COMP *)(ptr);
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
|
||||
if (!cpi)
|
||||
return;
|
||||
cpi->oxcf = *oxcf;
|
||||
|
||||
cpi->auto_gold = 1;
|
||||
cpi->auto_adjust_gold_quantizer = 1;
|
||||
@@ -1475,299 +1469,31 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cm->version = oxcf->Version;
|
||||
vp8_setup_version(cm);
|
||||
|
||||
if (oxcf == 0)
|
||||
{
|
||||
cpi->pass = 0;
|
||||
// change includes all joint functionality
|
||||
vp8_change_config(ptr, oxcf);
|
||||
|
||||
cpi->auto_worst_q = 0;
|
||||
cpi->oxcf.best_allowed_q = MINQ;
|
||||
cpi->oxcf.worst_allowed_q = MAXQ;
|
||||
cpi->oxcf.cq_level = MINQ;
|
||||
// Initialize active best and worst q and average q values.
|
||||
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
||||
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
|
||||
|
||||
cpi->oxcf.end_usage = USAGE_STREAM_FROM_SERVER;
|
||||
cpi->oxcf.starting_buffer_level = 4000;
|
||||
cpi->oxcf.optimal_buffer_level = 5000;
|
||||
cpi->oxcf.maximum_buffer_size = 6000;
|
||||
cpi->oxcf.under_shoot_pct = 90;
|
||||
cpi->oxcf.allow_df = 0;
|
||||
cpi->oxcf.drop_frames_water_mark = 20;
|
||||
|
||||
cpi->oxcf.allow_spatial_resampling = 0;
|
||||
cpi->oxcf.resample_down_water_mark = 40;
|
||||
cpi->oxcf.resample_up_water_mark = 60;
|
||||
|
||||
cpi->oxcf.fixed_q = cpi->interquantizer;
|
||||
|
||||
cpi->filter_type = NORMAL_LOOPFILTER;
|
||||
|
||||
if (cm->simpler_lpf)
|
||||
cpi->filter_type = SIMPLE_LOOPFILTER;
|
||||
|
||||
cpi->compressor_speed = 1;
|
||||
cpi->horiz_scale = 0;
|
||||
cpi->vert_scale = 0;
|
||||
cpi->oxcf.two_pass_vbrbias = 50;
|
||||
cpi->oxcf.two_pass_vbrmax_section = 400;
|
||||
cpi->oxcf.two_pass_vbrmin_section = 0;
|
||||
|
||||
cpi->oxcf.Sharpness = 0;
|
||||
cpi->oxcf.noise_sensitivity = 0;
|
||||
}
|
||||
else
|
||||
cpi->oxcf = *oxcf;
|
||||
|
||||
|
||||
switch (cpi->oxcf.Mode)
|
||||
{
|
||||
|
||||
case MODE_REALTIME:
|
||||
cpi->pass = 0;
|
||||
cpi->compressor_speed = 2;
|
||||
|
||||
if (cpi->oxcf.cpu_used < -16)
|
||||
{
|
||||
cpi->oxcf.cpu_used = -16;
|
||||
}
|
||||
|
||||
if (cpi->oxcf.cpu_used > 16)
|
||||
cpi->oxcf.cpu_used = 16;
|
||||
|
||||
break;
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
case MODE_GOODQUALITY:
|
||||
cpi->pass = 0;
|
||||
cpi->compressor_speed = 1;
|
||||
|
||||
if (cpi->oxcf.cpu_used < -5)
|
||||
{
|
||||
cpi->oxcf.cpu_used = -5;
|
||||
}
|
||||
|
||||
if (cpi->oxcf.cpu_used > 5)
|
||||
cpi->oxcf.cpu_used = 5;
|
||||
|
||||
break;
|
||||
|
||||
case MODE_BESTQUALITY:
|
||||
cpi->pass = 0;
|
||||
cpi->compressor_speed = 0;
|
||||
break;
|
||||
|
||||
case MODE_FIRSTPASS:
|
||||
cpi->pass = 1;
|
||||
cpi->compressor_speed = 1;
|
||||
break;
|
||||
case MODE_SECONDPASS:
|
||||
cpi->pass = 2;
|
||||
cpi->compressor_speed = 1;
|
||||
|
||||
if (cpi->oxcf.cpu_used < -5)
|
||||
{
|
||||
cpi->oxcf.cpu_used = -5;
|
||||
}
|
||||
|
||||
if (cpi->oxcf.cpu_used > 5)
|
||||
cpi->oxcf.cpu_used = 5;
|
||||
|
||||
break;
|
||||
case MODE_SECONDPASS_BEST:
|
||||
cpi->pass = 2;
|
||||
cpi->compressor_speed = 0;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (cpi->pass == 0)
|
||||
cpi->auto_worst_q = 1;
|
||||
|
||||
cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
|
||||
cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
|
||||
cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
|
||||
|
||||
if (oxcf->fixed_q >= 0)
|
||||
{
|
||||
if (oxcf->worst_allowed_q < 0)
|
||||
cpi->oxcf.fixed_q = q_trans[0];
|
||||
else
|
||||
cpi->oxcf.fixed_q = q_trans[oxcf->worst_allowed_q];
|
||||
|
||||
if (oxcf->alt_q < 0)
|
||||
cpi->oxcf.alt_q = q_trans[0];
|
||||
else
|
||||
cpi->oxcf.alt_q = q_trans[oxcf->alt_q];
|
||||
|
||||
if (oxcf->key_q < 0)
|
||||
cpi->oxcf.key_q = q_trans[0];
|
||||
else
|
||||
cpi->oxcf.key_q = q_trans[oxcf->key_q];
|
||||
|
||||
if (oxcf->gold_q < 0)
|
||||
cpi->oxcf.gold_q = q_trans[0];
|
||||
else
|
||||
cpi->oxcf.gold_q = q_trans[oxcf->gold_q];
|
||||
|
||||
}
|
||||
|
||||
cpi->baseline_gf_interval = cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
|
||||
cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
|
||||
|
||||
//cpi->use_golden_frame_only = 0;
|
||||
//cpi->use_last_frame_only = 0;
|
||||
cm->refresh_golden_frame = 0;
|
||||
cm->refresh_last_frame = 1;
|
||||
cm->refresh_entropy_probs = 1;
|
||||
|
||||
if (cpi->oxcf.token_partitions >= 0 && cpi->oxcf.token_partitions <= 3)
|
||||
cm->multi_token_partition = (TOKEN_PARTITION) cpi->oxcf.token_partitions;
|
||||
|
||||
setup_features(cpi);
|
||||
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
|
||||
}
|
||||
|
||||
// At the moment the first order values may not be > MAXQ
|
||||
if (cpi->oxcf.fixed_q > MAXQ)
|
||||
cpi->oxcf.fixed_q = MAXQ;
|
||||
|
||||
// local file playback mode == really big buffer
|
||||
if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
|
||||
{
|
||||
cpi->oxcf.starting_buffer_level = 60000;
|
||||
cpi->oxcf.optimal_buffer_level = 60000;
|
||||
cpi->oxcf.maximum_buffer_size = 240000;
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Convert target bandwidth from Kbit/s to Bit/s
|
||||
cpi->oxcf.target_bandwidth *= 1000;
|
||||
// Initialise the starting buffer levels
|
||||
cpi->oxcf.starting_buffer_level =
|
||||
rescale(cpi->oxcf.starting_buffer_level,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
if (cpi->oxcf.optimal_buffer_level == 0)
|
||||
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
|
||||
else
|
||||
cpi->oxcf.optimal_buffer_level =
|
||||
rescale(cpi->oxcf.optimal_buffer_level,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
if (cpi->oxcf.maximum_buffer_size == 0)
|
||||
cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
|
||||
else
|
||||
cpi->oxcf.maximum_buffer_size =
|
||||
rescale(cpi->oxcf.maximum_buffer_size,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
|
||||
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
|
||||
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
|
||||
|
||||
vp8_new_frame_rate(cpi, cpi->oxcf.frame_rate);
|
||||
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
|
||||
cpi->best_quality = cpi->oxcf.best_allowed_q;
|
||||
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
||||
|
||||
cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
|
||||
|
||||
cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
||||
|
||||
cpi->total_actual_bits = 0;
|
||||
cpi->total_target_vs_actual = 0;
|
||||
|
||||
// Only allow dropped frames in buffered mode
|
||||
cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
|
||||
|
||||
cm->filter_type = (LOOPFILTERTYPE) cpi->filter_type;
|
||||
|
||||
if (!cm->use_bilinear_mc_filter)
|
||||
cm->mcomp_filter_type = SIXTAP;
|
||||
else
|
||||
cm->mcomp_filter_type = BILINEAR;
|
||||
|
||||
cpi->target_bandwidth = cpi->oxcf.target_bandwidth;
|
||||
|
||||
cm->Width = cpi->oxcf.Width ;
|
||||
cm->Height = cpi->oxcf.Height ;
|
||||
|
||||
cpi->intra_frame_target = (4 * (cm->Width + cm->Height) / 15) * 1000; // As per VP8
|
||||
|
||||
cm->horiz_scale = cpi->horiz_scale;
|
||||
cm->vert_scale = cpi->vert_scale ;
|
||||
|
||||
// VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
|
||||
if (cpi->oxcf.Sharpness > 7)
|
||||
cpi->oxcf.Sharpness = 7;
|
||||
|
||||
cm->sharpness_level = cpi->oxcf.Sharpness;
|
||||
|
||||
if (cm->horiz_scale != NORMAL || cm->vert_scale != NORMAL)
|
||||
{
|
||||
int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs);
|
||||
int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs);
|
||||
|
||||
Scale2Ratio(cm->horiz_scale, &hr, &hs);
|
||||
Scale2Ratio(cm->vert_scale, &vr, &vs);
|
||||
|
||||
// always go to the next whole number
|
||||
cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs;
|
||||
cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
|
||||
}
|
||||
|
||||
if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
|
||||
((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
|
||||
{
|
||||
alloc_raw_frame_buffers(cpi);
|
||||
vp8_alloc_compressor_data(cpi);
|
||||
}
|
||||
|
||||
// Clamp KF frame size to quarter of data rate
|
||||
if (cpi->intra_frame_target > cpi->target_bandwidth >> 2)
|
||||
cpi->intra_frame_target = cpi->target_bandwidth >> 2;
|
||||
|
||||
if (cpi->oxcf.fixed_q >= 0)
|
||||
{
|
||||
cpi->last_q[0] = cpi->oxcf.fixed_q;
|
||||
cpi->last_q[1] = cpi->oxcf.fixed_q;
|
||||
}
|
||||
|
||||
cpi->Speed = cpi->oxcf.cpu_used;
|
||||
|
||||
// force to allowlag to 0 if lag_in_frames is 0;
|
||||
if (cpi->oxcf.lag_in_frames == 0)
|
||||
{
|
||||
cpi->oxcf.allow_lag = 0;
|
||||
}
|
||||
// Limit on lag buffers as these are not currently dynamically allocated
|
||||
else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
|
||||
cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
|
||||
|
||||
// YX Temp
|
||||
cpi->last_alt_ref_sei = -1;
|
||||
cpi->is_src_frame_alt_ref = 0;
|
||||
cpi->is_next_src_alt_ref = 0;
|
||||
|
||||
#if 0
|
||||
// Experimental RD Code
|
||||
cpi->frame_distortion = 0;
|
||||
cpi->last_frame_distortion = 0;
|
||||
#endif
|
||||
cpi->total_target_vs_actual = 0;
|
||||
|
||||
#if VP8_TEMPORAL_ALT_REF
|
||||
|
||||
cpi->use_weighted_temporal_filter = 0;
|
||||
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -1779,12 +1505,6 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* This function needs more clean up, i.e. be more tuned torwards
|
||||
* change_config rather than init_config !!!!!!!!!!!!!!!!
|
||||
* YX - 5/28/2009
|
||||
*
|
||||
*/
|
||||
|
||||
void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
{
|
||||
@@ -1897,7 +1617,8 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
|
||||
}
|
||||
|
||||
cpi->baseline_gf_interval = cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
|
||||
cpi->baseline_gf_interval =
|
||||
cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
|
||||
|
||||
cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
|
||||
|
||||
@@ -1908,7 +1629,8 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cm->refresh_entropy_probs = 1;
|
||||
|
||||
if (cpi->oxcf.token_partitions >= 0 && cpi->oxcf.token_partitions <= 3)
|
||||
cm->multi_token_partition = (TOKEN_PARTITION) cpi->oxcf.token_partitions;
|
||||
cm->multi_token_partition =
|
||||
(TOKEN_PARTITION) cpi->oxcf.token_partitions;
|
||||
|
||||
setup_features(cpi);
|
||||
|
||||
@@ -1929,16 +1651,12 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cpi->oxcf.starting_buffer_level = 60000;
|
||||
cpi->oxcf.optimal_buffer_level = 60000;
|
||||
cpi->oxcf.maximum_buffer_size = 240000;
|
||||
|
||||
}
|
||||
|
||||
// Convert target bandwidth from Kbit/s to Bit/s
|
||||
cpi->oxcf.target_bandwidth *= 1000;
|
||||
|
||||
cpi->oxcf.starting_buffer_level =
|
||||
rescale(cpi->oxcf.starting_buffer_level,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
// Set or reset optimal and maximum buffer levels.
|
||||
if (cpi->oxcf.optimal_buffer_level == 0)
|
||||
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
|
||||
else
|
||||
@@ -1953,31 +1671,41 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
rescale(cpi->oxcf.maximum_buffer_size,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
|
||||
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
|
||||
|
||||
// Set up frame rate and related parameters rate control values.
|
||||
vp8_new_frame_rate(cpi, cpi->oxcf.frame_rate);
|
||||
|
||||
// Set absolute upper and lower quality limits
|
||||
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
|
||||
cpi->best_quality = cpi->oxcf.best_allowed_q;
|
||||
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
||||
|
||||
// active values should only be modified if out of new range
|
||||
if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q)
|
||||
{
|
||||
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
}
|
||||
// less likely
|
||||
else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q)
|
||||
{
|
||||
cpi->active_worst_quality = cpi->oxcf.best_allowed_q;
|
||||
}
|
||||
if (cpi->active_best_quality < cpi->oxcf.best_allowed_q)
|
||||
{
|
||||
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
||||
}
|
||||
// less likely
|
||||
else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q)
|
||||
{
|
||||
cpi->active_best_quality = cpi->oxcf.worst_allowed_q;
|
||||
}
|
||||
|
||||
cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
|
||||
|
||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
||||
|
||||
cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
||||
|
||||
cpi->total_actual_bits = 0;
|
||||
cpi->total_target_vs_actual = 0;
|
||||
|
||||
// Only allow dropped frames in buffered mode
|
||||
cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
|
||||
cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
|
||||
|
||||
cm->filter_type = (LOOPFILTERTYPE) cpi->filter_type;
|
||||
cm->filter_type = (LOOPFILTERTYPE) cpi->filter_type;
|
||||
|
||||
if (!cm->use_bilinear_mc_filter)
|
||||
cm->mcomp_filter_type = SIXTAP;
|
||||
@@ -1992,7 +1720,8 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cm->horiz_scale = cpi->horiz_scale;
|
||||
cm->vert_scale = cpi->vert_scale ;
|
||||
|
||||
cpi->intra_frame_target = (4 * (cm->Width + cm->Height) / 15) * 1000; // As per VP8
|
||||
// As per VP8
|
||||
cpi->intra_frame_target = (4 * (cm->Width + cm->Height) / 15) * 1000;
|
||||
|
||||
// VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
|
||||
if (cpi->oxcf.Sharpness > 7)
|
||||
@@ -2013,8 +1742,10 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
|
||||
}
|
||||
|
||||
if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
|
||||
((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
|
||||
if (((cm->Width + 15) & 0xfffffff0) !=
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_width ||
|
||||
((cm->Height + 15) & 0xfffffff0) !=
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_height ||
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
|
||||
{
|
||||
alloc_raw_frame_buffers(cpi);
|
||||
@@ -2112,7 +1843,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
vp8_create_common(&cpi->common);
|
||||
vp8_cmachine_specific_config(cpi);
|
||||
|
||||
vp8_init_config((VP8_PTR)cpi, oxcf);
|
||||
init_config((VP8_PTR)cpi, oxcf);
|
||||
|
||||
memcpy(cpi->base_skip_false_prob, vp8cx_base_skip_false_prob, sizeof(vp8cx_base_skip_false_prob));
|
||||
cpi->common.current_video_frame = 0;
|
||||
@@ -2153,12 +1884,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols));
|
||||
cpi->active_map_enabled = 0;
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
// Create the first pass motion map structure and set to 0
|
||||
// Allocate space for maximum of 15 buffers
|
||||
CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(15*cpi->common.MBs, 1));
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
// Experimental code for lagged and one pass
|
||||
// Initialise one_pass GF frames stats
|
||||
@@ -2308,7 +2033,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
|
||||
}
|
||||
else if (cpi->pass == 2)
|
||||
{
|
||||
size_t packet_sz = vp8_firstpass_stats_sz(cpi->common.MBs);
|
||||
size_t packet_sz = sizeof(FIRSTPASS_STATS);
|
||||
int packets = oxcf->two_pass_stats_in.sz / packet_sz;
|
||||
|
||||
cpi->stats_in = oxcf->two_pass_stats_in.buf;
|
||||
@@ -2619,7 +2344,7 @@ void vp8_remove_compressor(VP8_PTR *ptr)
|
||||
vp8cx_remove_encoder_threads(cpi);
|
||||
#endif
|
||||
|
||||
vp8_dealloc_compressor_data(cpi);
|
||||
dealloc_compressor_data(cpi);
|
||||
vpx_free(cpi->mb.ss);
|
||||
vpx_free(cpi->tok);
|
||||
vpx_free(cpi->cyclic_refresh_map);
|
||||
@@ -3509,6 +3234,89 @@ static BOOL recode_loop_test( VP8_COMP *cpi,
|
||||
return force_recode;
|
||||
}
|
||||
|
||||
void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
|
||||
{
|
||||
if (cm->no_lpf)
|
||||
{
|
||||
cm->filter_level = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
struct vpx_usec_timer timer;
|
||||
|
||||
vp8_clear_system_state();
|
||||
|
||||
vpx_usec_timer_start(&timer);
|
||||
if (cpi->sf.auto_filter == 0)
|
||||
vp8cx_pick_filter_level_fast(cpi->Source, cpi);
|
||||
|
||||
else
|
||||
vp8cx_pick_filter_level(cpi->Source, cpi);
|
||||
|
||||
vpx_usec_timer_mark(&timer);
|
||||
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
|
||||
}
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */
|
||||
#endif
|
||||
|
||||
if (cm->filter_level > 0)
|
||||
{
|
||||
vp8cx_set_alt_lf_level(cpi, cm->filter_level);
|
||||
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level);
|
||||
cm->last_filter_type = cm->filter_type;
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
}
|
||||
|
||||
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
|
||||
|
||||
{
|
||||
YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
|
||||
YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
|
||||
YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
|
||||
YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx];
|
||||
// At this point the new frame has been encoded.
|
||||
// If any buffer copy / swapping is signaled it should be done here.
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
{
|
||||
vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12);
|
||||
vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12);
|
||||
}
|
||||
else // For non key frames
|
||||
{
|
||||
// Code to copy between reference buffers
|
||||
if (cm->copy_buffer_to_arf)
|
||||
{
|
||||
if (cm->copy_buffer_to_arf == 1)
|
||||
{
|
||||
if (cm->refresh_last_frame)
|
||||
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
|
||||
vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12);
|
||||
else
|
||||
vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12);
|
||||
}
|
||||
else if (cm->copy_buffer_to_arf == 2)
|
||||
vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12);
|
||||
}
|
||||
|
||||
if (cm->copy_buffer_to_gf)
|
||||
{
|
||||
if (cm->copy_buffer_to_gf == 1)
|
||||
{
|
||||
if (cm->refresh_last_frame)
|
||||
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
|
||||
vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12);
|
||||
else
|
||||
vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
|
||||
}
|
||||
else if (cm->copy_buffer_to_gf == 2)
|
||||
vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void encode_frame_to_data_rate
|
||||
(
|
||||
VP8_COMP *cpi,
|
||||
@@ -3542,6 +3350,7 @@ static void encode_frame_to_data_rate
|
||||
int drop_mark50 = drop_mark / 4;
|
||||
int drop_mark25 = drop_mark / 8;
|
||||
|
||||
|
||||
// Clear down mmx registers to allow floating point in what follows
|
||||
vp8_clear_system_state();
|
||||
|
||||
@@ -3862,11 +3671,12 @@ static void encode_frame_to_data_rate
|
||||
}
|
||||
}
|
||||
|
||||
// If CBR and the buffer is as full then it is reasonable to allow higher quality on the frames
|
||||
// to prevent bits just going to waste.
|
||||
// If CBR and the buffer is as full then it is reasonable to allow
|
||||
// higher quality on the frames to prevent bits just going to waste.
|
||||
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
|
||||
{
|
||||
// Note that the use of >= here elliminates the risk of a devide by 0 error in the else if clause
|
||||
// Note that the use of >= here elliminates the risk of a devide
|
||||
// by 0 error in the else if clause
|
||||
if (cpi->buffer_level >= cpi->oxcf.maximum_buffer_size)
|
||||
cpi->active_best_quality = cpi->best_quality;
|
||||
|
||||
@@ -3879,6 +3689,20 @@ static void encode_frame_to_data_rate
|
||||
}
|
||||
}
|
||||
}
|
||||
// Make sure constrained quality mode limits are adhered to for the first
|
||||
// few frames of one pass encodes
|
||||
else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
|
||||
{
|
||||
if ( (cm->frame_type == KEY_FRAME) ||
|
||||
cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame )
|
||||
{
|
||||
cpi->active_best_quality = cpi->best_quality;
|
||||
}
|
||||
else if (cpi->active_best_quality < cpi->cq_target_quality)
|
||||
{
|
||||
cpi->active_best_quality = cpi->cq_target_quality;
|
||||
}
|
||||
}
|
||||
|
||||
// Clip the active best and worst quality values to limits
|
||||
if (cpi->active_worst_quality > cpi->worst_quality)
|
||||
@@ -4058,8 +3882,8 @@ static void encode_frame_to_data_rate
|
||||
vp8_setup_key_frame(cpi);
|
||||
|
||||
// transform / motion compensation build reconstruction frame
|
||||
|
||||
vp8_encode_frame(cpi);
|
||||
|
||||
cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi);
|
||||
cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
|
||||
|
||||
@@ -4408,92 +4232,43 @@ static void encode_frame_to_data_rate
|
||||
else
|
||||
cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
|
||||
|
||||
if (cm->no_lpf)
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
if (cpi->b_multi_threaded)
|
||||
{
|
||||
cm->filter_level = 0;
|
||||
sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
struct vpx_usec_timer timer;
|
||||
|
||||
vpx_usec_timer_start(&timer);
|
||||
|
||||
if (cpi->sf.auto_filter == 0)
|
||||
vp8cx_pick_filter_level_fast(cpi->Source, cpi);
|
||||
else
|
||||
vp8cx_pick_filter_level(cpi->Source, cpi);
|
||||
|
||||
vpx_usec_timer_mark(&timer);
|
||||
|
||||
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
|
||||
loopfilter_frame(cpi, cm);
|
||||
}
|
||||
|
||||
if (cm->filter_level > 0)
|
||||
{
|
||||
vp8cx_set_alt_lf_level(cpi, cm->filter_level);
|
||||
vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level);
|
||||
cm->last_filter_type = cm->filter_type;
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
}
|
||||
|
||||
/* Move storing frame_type out of the above loop since it is also
|
||||
* needed in motion search besides loopfilter */
|
||||
cm->last_frame_type = cm->frame_type;
|
||||
|
||||
vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
|
||||
|
||||
if (cpi->oxcf.error_resilient_mode == 1)
|
||||
{
|
||||
cm->refresh_entropy_probs = 0;
|
||||
}
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* wait that filter_level is picked so that we can continue with stream packing */
|
||||
if (cpi->b_multi_threaded)
|
||||
sem_wait(&cpi->h_event_end_lpf);
|
||||
#endif
|
||||
|
||||
// build the bitstream
|
||||
vp8_pack_bitstream(cpi, dest, size);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
/* wait for loopfilter thread done */
|
||||
if (cpi->b_multi_threaded)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
|
||||
YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
|
||||
YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
|
||||
YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx];
|
||||
// At this point the new frame has been encoded coded.
|
||||
// If any buffer copy / swaping is signalled it should be done here.
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
{
|
||||
vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12);
|
||||
vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12);
|
||||
}
|
||||
else // For non key frames
|
||||
{
|
||||
// Code to copy between reference buffers
|
||||
if (cm->copy_buffer_to_arf)
|
||||
{
|
||||
if (cm->copy_buffer_to_arf == 1)
|
||||
{
|
||||
if (cm->refresh_last_frame)
|
||||
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
|
||||
vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12);
|
||||
else
|
||||
vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12);
|
||||
}
|
||||
else if (cm->copy_buffer_to_arf == 2)
|
||||
vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12);
|
||||
}
|
||||
|
||||
if (cm->copy_buffer_to_gf)
|
||||
{
|
||||
if (cm->copy_buffer_to_gf == 1)
|
||||
{
|
||||
if (cm->refresh_last_frame)
|
||||
// We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
|
||||
vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12);
|
||||
else
|
||||
vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
|
||||
}
|
||||
else if (cm->copy_buffer_to_gf == 2)
|
||||
vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12);
|
||||
}
|
||||
}
|
||||
sem_wait(&cpi->h_event_end_lpf);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Move storing frame_type out of the above loop since it is also
|
||||
* needed in motion search besides loopfilter */
|
||||
cm->last_frame_type = cm->frame_type;
|
||||
|
||||
// Update rate control heuristics
|
||||
cpi->total_byte_count += (*size);
|
||||
@@ -4817,18 +4592,8 @@ static void encode_frame_to_data_rate
|
||||
|
||||
}
|
||||
|
||||
int vp8_is_gf_update_needed(VP8_PTR ptr)
|
||||
{
|
||||
VP8_COMP *cpi = (VP8_COMP *) ptr;
|
||||
int ret_val;
|
||||
|
||||
ret_val = cpi->gf_update_recommended;
|
||||
cpi->gf_update_recommended = 0;
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
void vp8_check_gf_quality(VP8_COMP *cpi)
|
||||
static void check_gf_quality(VP8_COMP *cpi)
|
||||
{
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols);
|
||||
@@ -5077,7 +4842,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
if (start_frame < 0)
|
||||
start_frame += cpi->oxcf.lag_in_frames;
|
||||
|
||||
besterr = vp8_calc_low_ss_err(&cpi->src_buffer[cpi->last_alt_ref_sei].source_buffer,
|
||||
besterr = calc_low_ss_err(&cpi->src_buffer[cpi->last_alt_ref_sei].source_buffer,
|
||||
&cpi->src_buffer[start_frame].source_buffer, IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
for (i = 0; i < 7; i++)
|
||||
@@ -5086,7 +4851,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
cpi->oxcf.arnr_strength = i;
|
||||
vp8_temporal_filter_prepare_c(cpi);
|
||||
|
||||
thiserr = vp8_calc_low_ss_err(&cpi->alt_ref_buffer.source_buffer,
|
||||
thiserr = calc_low_ss_err(&cpi->alt_ref_buffer.source_buffer,
|
||||
&cpi->src_buffer[start_frame].source_buffer, IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
if (10 * thiserr < besterr * 8)
|
||||
@@ -5229,7 +4994,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
|
||||
if (cpi->compressor_speed == 2)
|
||||
{
|
||||
vp8_check_gf_quality(cpi);
|
||||
check_gf_quality(cpi);
|
||||
vpx_usec_timer_start(&tsctimer);
|
||||
vpx_usec_timer_start(&ticktimer);
|
||||
}
|
||||
@@ -5328,7 +5093,9 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
|
||||
|
||||
if (cpi->b_calculate_psnr && cpi->pass != 1 && cm->show_frame)
|
||||
{
|
||||
generate_psnr_packet(cpi);
|
||||
}
|
||||
|
||||
#if CONFIG_PSNR
|
||||
|
||||
@@ -5344,12 +5111,35 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
if (cpi->b_calculate_psnr)
|
||||
{
|
||||
double y, u, v;
|
||||
double sq_error;
|
||||
double frame_psnr = vp8_calc_psnr(cpi->Source, cm->frame_to_show, &y, &u, &v, &sq_error);
|
||||
double ye,ue,ve;
|
||||
double frame_psnr;
|
||||
YV12_BUFFER_CONFIG *orig = cpi->Source;
|
||||
YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
|
||||
YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
|
||||
int y_samples = orig->y_height * orig->y_width ;
|
||||
int uv_samples = orig->uv_height * orig->uv_width ;
|
||||
int t_samples = y_samples + 2 * uv_samples;
|
||||
long long sq_error;
|
||||
|
||||
cpi->total_y += y;
|
||||
cpi->total_u += u;
|
||||
cpi->total_v += v;
|
||||
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
|
||||
recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
|
||||
recon->u_buffer, recon->uv_stride, orig->uv_width, orig->uv_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
|
||||
recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
sq_error = ye + ue + ve;
|
||||
|
||||
frame_psnr = vp8_mse2psnr(t_samples, 255.0, sq_error);
|
||||
|
||||
cpi->total_y += vp8_mse2psnr(y_samples, 255.0, ye);
|
||||
cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, ue);
|
||||
cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, ve);
|
||||
cpi->total_sq_error += sq_error;
|
||||
cpi->total += frame_psnr;
|
||||
{
|
||||
@@ -5358,18 +5148,36 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
|
||||
|
||||
vp8_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0, IF_RTCD(&cm->rtcd.postproc));
|
||||
vp8_clear_system_state();
|
||||
frame_psnr2 = vp8_calc_psnr(cpi->Source, &cm->post_proc_buffer, &y2, &u2, &v2, &sq_error);
|
||||
frame_ssim2 = vp8_calc_ssim(cpi->Source, &cm->post_proc_buffer, 1, &weight);
|
||||
|
||||
ye = calc_plane_error(orig->y_buffer, orig->y_stride,
|
||||
pp->y_buffer, pp->y_stride, orig->y_width, orig->y_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
ue = calc_plane_error(orig->u_buffer, orig->uv_stride,
|
||||
pp->u_buffer, pp->uv_stride, orig->uv_width, orig->uv_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
ve = calc_plane_error(orig->v_buffer, orig->uv_stride,
|
||||
pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
sq_error = ye + ue + ve;
|
||||
|
||||
frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error);
|
||||
|
||||
cpi->totalp_y += vp8_mse2psnr(y_samples, 255.0, ye);
|
||||
cpi->totalp_u += vp8_mse2psnr(uv_samples, 255.0, ue);
|
||||
cpi->totalp_v += vp8_mse2psnr(uv_samples, 255.0, ve);
|
||||
cpi->total_sq_error2 += sq_error;
|
||||
cpi->totalp += frame_psnr2;
|
||||
|
||||
frame_ssim2 = vp8_calc_ssim(cpi->Source,
|
||||
&cm->post_proc_buffer, 1, &weight,
|
||||
IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
cpi->summed_quality += frame_ssim2 * weight;
|
||||
cpi->summed_weights += weight;
|
||||
|
||||
cpi->totalp_y += y2;
|
||||
cpi->totalp_u += u2;
|
||||
cpi->totalp_v += v2;
|
||||
cpi->totalp += frame_psnr2;
|
||||
cpi->total_sq_error2 += sq_error;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5565,7 +5373,9 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const
|
||||
|
||||
return Total;
|
||||
}
|
||||
int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd)
|
||||
|
||||
|
||||
static int calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd)
|
||||
{
|
||||
int i, j;
|
||||
int Total = 0;
|
||||
@@ -5593,11 +5403,7 @@ int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, co
|
||||
return Total;
|
||||
}
|
||||
|
||||
int vp8_get_speed(VP8_PTR c)
|
||||
{
|
||||
VP8_COMP *cpi = (VP8_COMP *) c;
|
||||
return cpi->Speed;
|
||||
}
|
||||
|
||||
int vp8_get_quantizer(VP8_PTR c)
|
||||
{
|
||||
VP8_COMP *cpi = (VP8_COMP *) c;
|
||||
|
@@ -99,6 +99,7 @@ typedef struct
|
||||
double pcnt_inter;
|
||||
double pcnt_motion;
|
||||
double pcnt_second_ref;
|
||||
double pcnt_neutral;
|
||||
double MVr;
|
||||
double mvr_abs;
|
||||
double MVc;
|
||||
@@ -495,11 +496,6 @@ typedef struct
|
||||
struct vpx_codec_pkt_list *output_pkt_list;
|
||||
int first_pass_done;
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
unsigned char *fp_motion_map;
|
||||
unsigned char *fp_motion_map_stats, *fp_motion_map_stats_save;
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
// Experimental code for lagged and one pass
|
||||
ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS];
|
||||
@@ -603,12 +599,17 @@ typedef struct
|
||||
int encoding_thread_count;
|
||||
|
||||
pthread_t *h_encoding_thread;
|
||||
pthread_t h_filter_thread;
|
||||
|
||||
MB_ROW_COMP *mb_row_ei;
|
||||
ENCODETHREAD_DATA *en_thread_data;
|
||||
LPFTHREAD_DATA lpf_thread_data;
|
||||
|
||||
//events
|
||||
sem_t *h_event_start_encoding;
|
||||
sem_t h_event_end_encoding;
|
||||
sem_t h_event_start_lpf;
|
||||
sem_t h_event_end_lpf;
|
||||
#endif
|
||||
|
||||
TOKENLIST *tplist;
|
||||
@@ -641,8 +642,6 @@ typedef struct
|
||||
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
|
||||
int fixed_divide[512];
|
||||
#endif
|
||||
// Flag to indicate temporal filter method
|
||||
int use_weighted_temporal_filter;
|
||||
|
||||
#if CONFIG_PSNR
|
||||
int count;
|
||||
|
@@ -664,7 +664,8 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
case V_PRED:
|
||||
case H_PRED:
|
||||
case TM_PRED:
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
|
||||
(&x->e_mbd);
|
||||
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
|
||||
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
|
||||
this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
|
||||
|
@@ -29,89 +29,3 @@ double vp8_mse2psnr(double Samples, double Peak, double Mse)
|
||||
|
||||
return psnr;
|
||||
}
|
||||
|
||||
double vp8_calc_psnr(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, double *YPsnr, double *UPsnr, double *VPsnr, double *sq_error)
|
||||
{
|
||||
int i, j;
|
||||
int Diff;
|
||||
double frame_psnr;
|
||||
double Total;
|
||||
double grand_total;
|
||||
unsigned char *src = source->y_buffer;
|
||||
unsigned char *dst = dest->y_buffer;
|
||||
|
||||
Total = 0.0;
|
||||
grand_total = 0.0;
|
||||
|
||||
// Loop throught the Y plane raw and reconstruction data summing (square differences)
|
||||
for (i = 0; i < source->y_height; i++)
|
||||
{
|
||||
|
||||
for (j = 0; j < source->y_width; j++)
|
||||
{
|
||||
Diff = (int)(src[j]) - (int)(dst[j]);
|
||||
Total += Diff * Diff;
|
||||
}
|
||||
|
||||
src += source->y_stride;
|
||||
dst += dest->y_stride;
|
||||
}
|
||||
|
||||
// Work out Y PSNR
|
||||
*YPsnr = vp8_mse2psnr(source->y_height * source->y_width, 255.0, Total);
|
||||
grand_total += Total;
|
||||
Total = 0;
|
||||
|
||||
|
||||
// Loop through the U plane
|
||||
src = source->u_buffer;
|
||||
dst = dest->u_buffer;
|
||||
|
||||
for (i = 0; i < source->uv_height; i++)
|
||||
{
|
||||
|
||||
for (j = 0; j < source->uv_width; j++)
|
||||
{
|
||||
Diff = (int)(src[j]) - (int)(dst[j]);
|
||||
Total += Diff * Diff;
|
||||
}
|
||||
|
||||
src += source->uv_stride;
|
||||
dst += dest->uv_stride;
|
||||
}
|
||||
|
||||
// Work out U PSNR
|
||||
*UPsnr = vp8_mse2psnr(source->uv_height * source->uv_width, 255.0, Total);
|
||||
grand_total += Total;
|
||||
Total = 0;
|
||||
|
||||
|
||||
// V PSNR
|
||||
src = source->v_buffer;
|
||||
dst = dest->v_buffer;
|
||||
|
||||
for (i = 0; i < source->uv_height; i++)
|
||||
{
|
||||
|
||||
for (j = 0; j < source->uv_width; j++)
|
||||
{
|
||||
Diff = (int)(src[j]) - (int)(dst[j]);
|
||||
Total += Diff * Diff;
|
||||
}
|
||||
|
||||
src += source->uv_stride;
|
||||
dst += dest->uv_stride;
|
||||
}
|
||||
|
||||
// Work out UV PSNR
|
||||
*VPsnr = vp8_mse2psnr(source->uv_height * source->uv_width, 255.0, Total);
|
||||
grand_total += Total;
|
||||
Total = 0;
|
||||
|
||||
// Work out total PSNR
|
||||
frame_psnr = vp8_mse2psnr(source->y_height * source->y_width * 3 / 2 , 255.0, grand_total);
|
||||
|
||||
*sq_error = 1.0 * grand_total;
|
||||
|
||||
return frame_psnr;
|
||||
}
|
||||
|
@@ -13,6 +13,5 @@
|
||||
#define __INC_PSNR_H
|
||||
|
||||
extern double vp8_mse2psnr(double Samples, double Peak, double Mse);
|
||||
extern double vp8_calc_psnr(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, double *YPsnr, double *UPsnr, double *VPsnr, double *sq_error);
|
||||
|
||||
#endif
|
||||
|
@@ -90,7 +90,7 @@ const int vp8_bits_per_mb[2][QINDEX_RANGE] =
|
||||
}
|
||||
};
|
||||
|
||||
const int vp8_kf_boost_qadjustment[QINDEX_RANGE] =
|
||||
static const int kf_boost_qadjustment[QINDEX_RANGE] =
|
||||
{
|
||||
128, 129, 130, 131, 132, 133, 134, 135,
|
||||
136, 137, 138, 139, 140, 141, 142, 143,
|
||||
@@ -154,7 +154,7 @@ const int vp8_gf_boost_qadjustment[QINDEX_RANGE] =
|
||||
};
|
||||
*/
|
||||
|
||||
const int vp8_kf_gf_boost_qlimits[QINDEX_RANGE] =
|
||||
static const int kf_gf_boost_qlimits[QINDEX_RANGE] =
|
||||
{
|
||||
150, 155, 160, 165, 170, 175, 180, 185,
|
||||
190, 195, 200, 205, 210, 215, 220, 225,
|
||||
@@ -175,14 +175,14 @@ const int vp8_kf_gf_boost_qlimits[QINDEX_RANGE] =
|
||||
};
|
||||
|
||||
// % adjustment to target kf size based on seperation from previous frame
|
||||
const int vp8_kf_boost_seperationt_adjustment[16] =
|
||||
static const int kf_boost_seperation_adjustment[16] =
|
||||
{
|
||||
30, 40, 50, 55, 60, 65, 70, 75,
|
||||
80, 85, 90, 95, 100, 100, 100, 100,
|
||||
};
|
||||
|
||||
|
||||
const int vp8_gf_adjust_table[101] =
|
||||
static const int gf_adjust_table[101] =
|
||||
{
|
||||
100,
|
||||
115, 130, 145, 160, 175, 190, 200, 210, 220, 230,
|
||||
@@ -197,13 +197,13 @@ const int vp8_gf_adjust_table[101] =
|
||||
400, 400, 400, 400, 400, 400, 400, 400, 400, 400,
|
||||
};
|
||||
|
||||
const int vp8_gf_intra_useage_adjustment[20] =
|
||||
static const int gf_intra_usage_adjustment[20] =
|
||||
{
|
||||
125, 120, 115, 110, 105, 100, 95, 85, 80, 75,
|
||||
70, 65, 60, 55, 50, 50, 50, 50, 50, 50,
|
||||
};
|
||||
|
||||
const int vp8_gf_interval_table[101] =
|
||||
static const int gf_interval_table[101] =
|
||||
{
|
||||
7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
@@ -353,7 +353,7 @@ void vp8_calc_auto_iframe_target_size(VP8_COMP *cpi)
|
||||
kf_boost = (int)(2 * cpi->output_frame_rate - 16);
|
||||
|
||||
// adjustment up based on q
|
||||
kf_boost = kf_boost * vp8_kf_boost_qadjustment[cpi->ni_av_qi] / 100;
|
||||
kf_boost = kf_boost * kf_boost_qadjustment[cpi->ni_av_qi] / 100;
|
||||
|
||||
// frame separation adjustment ( down)
|
||||
if (cpi->frames_since_key < cpi->output_frame_rate / 2)
|
||||
@@ -488,10 +488,10 @@ static void calc_gf_params(VP8_COMP *cpi)
|
||||
Boost = GFQ_ADJUSTMENT;
|
||||
|
||||
// Adjust based upon most recently measure intra useage
|
||||
Boost = Boost * vp8_gf_intra_useage_adjustment[(cpi->this_frame_percent_intra < 15) ? cpi->this_frame_percent_intra : 14] / 100;
|
||||
Boost = Boost * gf_intra_usage_adjustment[(cpi->this_frame_percent_intra < 15) ? cpi->this_frame_percent_intra : 14] / 100;
|
||||
|
||||
// Adjust gf boost based upon GF usage since last GF
|
||||
Boost = Boost * vp8_gf_adjust_table[gf_frame_useage] / 100;
|
||||
Boost = Boost * gf_adjust_table[gf_frame_useage] / 100;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -503,8 +503,8 @@ static void calc_gf_params(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
// Apply an upper limit based on Q for 1 pass encodes
|
||||
if (Boost > vp8_kf_gf_boost_qlimits[Q] && (cpi->pass == 0))
|
||||
Boost = vp8_kf_gf_boost_qlimits[Q];
|
||||
if (Boost > kf_gf_boost_qlimits[Q] && (cpi->pass == 0))
|
||||
Boost = kf_gf_boost_qlimits[Q];
|
||||
|
||||
// Apply lower limits to boost.
|
||||
else if (Boost < 110)
|
||||
@@ -539,8 +539,8 @@ static void calc_gf_params(VP8_COMP *cpi)
|
||||
if (cpi->last_boost >= 1500)
|
||||
cpi->frames_till_gf_update_due ++;
|
||||
|
||||
if (vp8_gf_interval_table[gf_frame_useage] > cpi->frames_till_gf_update_due)
|
||||
cpi->frames_till_gf_update_due = vp8_gf_interval_table[gf_frame_useage];
|
||||
if (gf_interval_table[gf_frame_useage] > cpi->frames_till_gf_update_due)
|
||||
cpi->frames_till_gf_update_due = gf_interval_table[gf_frame_useage];
|
||||
|
||||
if (cpi->frames_till_gf_update_due > cpi->max_gf_interval)
|
||||
cpi->frames_till_gf_update_due = cpi->max_gf_interval;
|
||||
@@ -594,17 +594,17 @@ void vp8_calc_iframe_target_size(VP8_COMP *cpi)
|
||||
// between key frames.
|
||||
|
||||
// Adjust boost based upon ambient Q
|
||||
Boost = vp8_kf_boost_qadjustment[Q];
|
||||
Boost = kf_boost_qadjustment[Q];
|
||||
|
||||
// Make the Key frame boost less if the seperation from the previous key frame is small
|
||||
if (cpi->frames_since_key < 16)
|
||||
Boost = Boost * vp8_kf_boost_seperationt_adjustment[cpi->frames_since_key] / 100;
|
||||
Boost = Boost * kf_boost_seperation_adjustment[cpi->frames_since_key] / 100;
|
||||
else
|
||||
Boost = Boost * vp8_kf_boost_seperationt_adjustment[15] / 100;
|
||||
Boost = Boost * kf_boost_seperation_adjustment[15] / 100;
|
||||
|
||||
// Apply limits on boost
|
||||
if (Boost > vp8_kf_gf_boost_qlimits[Q])
|
||||
Boost = vp8_kf_gf_boost_qlimits[Q];
|
||||
if (Boost > kf_gf_boost_qlimits[Q])
|
||||
Boost = kf_gf_boost_qlimits[Q];
|
||||
else if (Boost < 120)
|
||||
Boost = 120;
|
||||
}
|
||||
@@ -842,7 +842,8 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
{
|
||||
int one_percent_bits = 1 + cpi->oxcf.optimal_buffer_level / 100;
|
||||
|
||||
if ((cpi->buffer_level < cpi->oxcf.optimal_buffer_level) || (cpi->bits_off_target < cpi->oxcf.optimal_buffer_level))
|
||||
if ((cpi->buffer_level < cpi->oxcf.optimal_buffer_level) ||
|
||||
(cpi->bits_off_target < cpi->oxcf.optimal_buffer_level))
|
||||
{
|
||||
int percent_low = 0;
|
||||
|
||||
@@ -851,9 +852,12 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
// If we are are below the optimal buffer fullness level and adherence
|
||||
// to buffering contraints is important to the end useage then adjust
|
||||
// the per frame target.
|
||||
if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && (cpi->buffer_level < cpi->oxcf.optimal_buffer_level))
|
||||
if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
|
||||
(cpi->buffer_level < cpi->oxcf.optimal_buffer_level))
|
||||
{
|
||||
percent_low = (cpi->oxcf.optimal_buffer_level - cpi->buffer_level) / one_percent_bits;
|
||||
percent_low =
|
||||
(cpi->oxcf.optimal_buffer_level - cpi->buffer_level) /
|
||||
one_percent_bits;
|
||||
|
||||
if (percent_low > 100)
|
||||
percent_low = 100;
|
||||
@@ -864,7 +868,8 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
else if (cpi->bits_off_target < 0)
|
||||
{
|
||||
// Adjust per frame data target downwards to compensate.
|
||||
percent_low = (int)(100 * -cpi->bits_off_target / (cpi->total_byte_count * 8));
|
||||
percent_low = (int)(100 * -cpi->bits_off_target /
|
||||
(cpi->total_byte_count * 8));
|
||||
|
||||
if (percent_low > 100)
|
||||
percent_low = 100;
|
||||
@@ -873,39 +878,60 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
// lower the target bandwidth for this frame.
|
||||
cpi->this_frame_target = (cpi->this_frame_target * (100 - (percent_low / 2))) / 100;
|
||||
cpi->this_frame_target =
|
||||
(cpi->this_frame_target * (100 - (percent_low / 2))) / 100;
|
||||
|
||||
// Are we using allowing control of active_worst_allowed_q according to buffer level.
|
||||
// Are we using allowing control of active_worst_allowed_q
|
||||
// according to buffer level.
|
||||
if (cpi->auto_worst_q)
|
||||
{
|
||||
int critical_buffer_level;
|
||||
|
||||
// For streaming applications the most important factor is cpi->buffer_level as this takes
|
||||
// into account the specified short term buffering constraints. However, hitting the long
|
||||
// term clip data rate target is also important.
|
||||
// For streaming applications the most important factor is
|
||||
// cpi->buffer_level as this takes into account the
|
||||
// specified short term buffering constraints. However,
|
||||
// hitting the long term clip data rate target is also
|
||||
// important.
|
||||
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
|
||||
{
|
||||
// Take the smaller of cpi->buffer_level and cpi->bits_off_target
|
||||
critical_buffer_level = (cpi->buffer_level < cpi->bits_off_target) ? cpi->buffer_level : cpi->bits_off_target;
|
||||
// Take the smaller of cpi->buffer_level and
|
||||
// cpi->bits_off_target
|
||||
critical_buffer_level =
|
||||
(cpi->buffer_level < cpi->bits_off_target)
|
||||
? cpi->buffer_level : cpi->bits_off_target;
|
||||
}
|
||||
// For local file playback short term buffering contraints are less of an issue
|
||||
// For local file playback short term buffering contraints
|
||||
// are less of an issue
|
||||
else
|
||||
{
|
||||
// Consider only how we are doing for the clip as a whole
|
||||
// Consider only how we are doing for the clip as a
|
||||
// whole
|
||||
critical_buffer_level = cpi->bits_off_target;
|
||||
}
|
||||
|
||||
// Set the active worst quality based upon the selected buffer fullness number.
|
||||
// Set the active worst quality based upon the selected
|
||||
// buffer fullness number.
|
||||
if (critical_buffer_level < cpi->oxcf.optimal_buffer_level)
|
||||
{
|
||||
if (critical_buffer_level > (cpi->oxcf.optimal_buffer_level / 4))
|
||||
if ( critical_buffer_level >
|
||||
(cpi->oxcf.optimal_buffer_level >> 2) )
|
||||
{
|
||||
int qadjustment_range = cpi->worst_quality - cpi->ni_av_qi;
|
||||
int above_base = (critical_buffer_level - (cpi->oxcf.optimal_buffer_level / 4));
|
||||
INT64 qadjustment_range =
|
||||
cpi->worst_quality - cpi->ni_av_qi;
|
||||
INT64 above_base =
|
||||
(critical_buffer_level -
|
||||
(cpi->oxcf.optimal_buffer_level >> 2));
|
||||
|
||||
// Step active worst quality down from cpi->ni_av_qi when (critical_buffer_level == cpi->optimal_buffer_level)
|
||||
// to cpi->oxcf.worst_allowed_q when (critical_buffer_level == cpi->optimal_buffer_level/4)
|
||||
cpi->active_worst_quality = cpi->worst_quality - ((qadjustment_range * above_base) / (cpi->oxcf.optimal_buffer_level * 3 / 4));
|
||||
// Step active worst quality down from
|
||||
// cpi->ni_av_qi when (critical_buffer_level ==
|
||||
// cpi->optimal_buffer_level) to
|
||||
// cpi->worst_quality when
|
||||
// (critical_buffer_level ==
|
||||
// cpi->optimal_buffer_level >> 2)
|
||||
cpi->active_worst_quality =
|
||||
cpi->worst_quality -
|
||||
((qadjustment_range * above_base) /
|
||||
(cpi->oxcf.optimal_buffer_level*3>>2));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -965,6 +991,15 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
// Set the active worst quality
|
||||
cpi->active_worst_quality = cpi->worst_quality;
|
||||
}
|
||||
|
||||
// Special trap for constrained quality mode
|
||||
// "active_worst_quality" may never drop below cq level
|
||||
// for any frame type.
|
||||
if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
|
||||
cpi->active_worst_quality < cpi->cq_target_quality)
|
||||
{
|
||||
cpi->active_worst_quality = cpi->cq_target_quality;
|
||||
}
|
||||
}
|
||||
|
||||
// Test to see if we have to drop a frame
|
||||
|
@@ -53,7 +53,7 @@ extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
|
||||
|
||||
|
||||
|
||||
const int vp8_auto_speed_thresh[17] =
|
||||
static const int auto_speed_thresh[17] =
|
||||
{
|
||||
1000,
|
||||
200,
|
||||
@@ -353,7 +353,7 @@ void vp8_auto_select_speed(VP8_COMP *cpi)
|
||||
}
|
||||
}
|
||||
|
||||
if (milliseconds_for_compress * 100 > cpi->avg_encode_time * vp8_auto_speed_thresh[cpi->Speed])
|
||||
if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed])
|
||||
{
|
||||
cpi->Speed -= 1;
|
||||
cpi->avg_pick_mode_time = 0;
|
||||
@@ -745,7 +745,8 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
|
||||
{
|
||||
x->e_mbd.mode_info_context->mbmi.mode = mode;
|
||||
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
|
||||
(&x->e_mbd);
|
||||
|
||||
macro_block_yrd(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd.encodemb));
|
||||
rate = ratey + x->mbmode_cost[x->e_mbd.frame_type]
|
||||
@@ -999,13 +1000,6 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels
|
||||
return distortion;
|
||||
}
|
||||
|
||||
unsigned char vp8_mbsplit_offset2[4][16] = {
|
||||
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
|
||||
};
|
||||
|
||||
|
||||
static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
|
||||
|
||||
@@ -1033,8 +1027,8 @@ typedef struct
|
||||
} BEST_SEG_INFO;
|
||||
|
||||
|
||||
void vp8_rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
|
||||
unsigned int segmentation)
|
||||
static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
BEST_SEG_INFO *bsi, unsigned int segmentation)
|
||||
{
|
||||
int i;
|
||||
int const *labels;
|
||||
@@ -1152,7 +1146,7 @@ void vp8_rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi,
|
||||
int sadpb = x->sadperbit4;
|
||||
|
||||
// find first label
|
||||
n = vp8_mbsplit_offset2[segmentation][i];
|
||||
n = vp8_mbsplit_offset[segmentation][i];
|
||||
|
||||
c = &x->block[n];
|
||||
e = &x->e_mbd.block[n];
|
||||
@@ -1331,16 +1325,16 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
{
|
||||
/* for now, we will keep the original segmentation order
|
||||
when in best quality mode */
|
||||
vp8_rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
|
||||
vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
|
||||
vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
|
||||
vp8_rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
|
||||
rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
|
||||
rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
|
||||
rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
|
||||
rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
|
||||
}
|
||||
else
|
||||
{
|
||||
int sr;
|
||||
|
||||
vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
|
||||
rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
|
||||
|
||||
if (bsi.segment_rd < best_rd)
|
||||
{
|
||||
@@ -1379,7 +1373,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
sr = MAXF((abs(bsi.sv_mvp[1].row - bsi.sv_mvp[3].row))>>3, (abs(bsi.sv_mvp[1].col - bsi.sv_mvp[3].col))>>3);
|
||||
vp8_cal_step_param(sr, &bsi.sv_istep[1]);
|
||||
|
||||
vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
|
||||
rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
|
||||
}
|
||||
|
||||
/* block 16X8 */
|
||||
@@ -1390,7 +1384,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
sr = MAXF((abs(bsi.sv_mvp[2].row - bsi.sv_mvp[3].row))>>3, (abs(bsi.sv_mvp[2].col - bsi.sv_mvp[3].col))>>3);
|
||||
vp8_cal_step_param(sr, &bsi.sv_istep[1]);
|
||||
|
||||
vp8_rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
|
||||
rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
|
||||
}
|
||||
|
||||
/* If 8x8 is better than 16x8/8x16, then do 4x4 search */
|
||||
@@ -1398,7 +1392,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8) /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
|
||||
{
|
||||
bsi.mvp = &bsi.sv_mvp[0];
|
||||
vp8_rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
|
||||
rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
|
||||
}
|
||||
|
||||
/* restore UMV window */
|
||||
@@ -1431,7 +1425,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
|
||||
{
|
||||
int j;
|
||||
|
||||
j = vp8_mbsplit_offset2[bsi.segment_num][i];
|
||||
j = vp8_mbsplit_offset[bsi.segment_num][i];
|
||||
|
||||
x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode;
|
||||
x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
|
||||
@@ -1968,7 +1962,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
else
|
||||
cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
|
||||
}
|
||||
else if (vp8_ref_frame_order[mode_index] == SPLITMV)
|
||||
else if (vp8_mode_order[mode_index] == SPLITMV)
|
||||
cpi->zbin_mode_boost = 0;
|
||||
else
|
||||
cpi->zbin_mode_boost = MV_ZBIN_BOOST;
|
||||
@@ -2038,7 +2032,8 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
|
||||
case H_PRED:
|
||||
case TM_PRED:
|
||||
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
|
||||
vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
|
||||
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
|
||||
(&x->e_mbd);
|
||||
macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ;
|
||||
rate2 += rate_y;
|
||||
distortion2 += distortion;
|
||||
|
@@ -11,298 +11,13 @@
|
||||
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "math.h"
|
||||
#include "onyx_int.h"
|
||||
|
||||
#define C1 (float)(64 * 64 * 0.01*255*0.01*255)
|
||||
#define C2 (float)(64 * 64 * 0.03*255*0.03*255)
|
||||
|
||||
static int width_y;
|
||||
static int height_y;
|
||||
static int height_uv;
|
||||
static int width_uv;
|
||||
static int stride_uv;
|
||||
static int stride;
|
||||
static int lumimask;
|
||||
static int luminance;
|
||||
static double plane_summed_weights = 0;
|
||||
|
||||
static short img12_sum_block[8*4096*4096*2] ;
|
||||
|
||||
static short img1_sum[8*4096*2];
|
||||
static short img2_sum[8*4096*2];
|
||||
static int img1_sq_sum[8*4096*2];
|
||||
static int img2_sq_sum[8*4096*2];
|
||||
static int img12_mul_sum[8*4096*2];
|
||||
|
||||
|
||||
double vp8_similarity
|
||||
(
|
||||
int mu_x,
|
||||
int mu_y,
|
||||
int pre_mu_x2,
|
||||
int pre_mu_y2,
|
||||
int pre_mu_xy2
|
||||
)
|
||||
{
|
||||
int mu_x2, mu_y2, mu_xy, theta_x2, theta_y2, theta_xy;
|
||||
|
||||
mu_x2 = mu_x * mu_x;
|
||||
mu_y2 = mu_y * mu_y;
|
||||
mu_xy = mu_x * mu_y;
|
||||
|
||||
theta_x2 = 64 * pre_mu_x2 - mu_x2;
|
||||
theta_y2 = 64 * pre_mu_y2 - mu_y2;
|
||||
theta_xy = 64 * pre_mu_xy2 - mu_xy;
|
||||
|
||||
return (2 * mu_xy + C1) * (2 * theta_xy + C2) / ((mu_x2 + mu_y2 + C1) * (theta_x2 + theta_y2 + C2));
|
||||
}
|
||||
|
||||
double vp8_ssim
|
||||
(
|
||||
const unsigned char *img1,
|
||||
const unsigned char *img2,
|
||||
int stride_img1,
|
||||
int stride_img2,
|
||||
int width,
|
||||
int height
|
||||
)
|
||||
{
|
||||
int x, y, x2, y2, img1_block, img2_block, img1_sq_block, img2_sq_block, img12_mul_block, temp;
|
||||
|
||||
double plane_quality, weight, mean;
|
||||
|
||||
short *img1_sum_ptr1, *img1_sum_ptr2;
|
||||
short *img2_sum_ptr1, *img2_sum_ptr2;
|
||||
int *img1_sq_sum_ptr1, *img1_sq_sum_ptr2;
|
||||
int *img2_sq_sum_ptr1, *img2_sq_sum_ptr2;
|
||||
int *img12_mul_sum_ptr1, *img12_mul_sum_ptr2;
|
||||
|
||||
plane_quality = 0;
|
||||
|
||||
if (lumimask)
|
||||
plane_summed_weights = 0.0f;
|
||||
else
|
||||
plane_summed_weights = (height - 7) * (width - 7);
|
||||
|
||||
//some prologue for the main loop
|
||||
temp = 8 * width;
|
||||
|
||||
img1_sum_ptr1 = img1_sum + temp;
|
||||
img2_sum_ptr1 = img2_sum + temp;
|
||||
img1_sq_sum_ptr1 = img1_sq_sum + temp;
|
||||
img2_sq_sum_ptr1 = img2_sq_sum + temp;
|
||||
img12_mul_sum_ptr1 = img12_mul_sum + temp;
|
||||
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
img1_sum[x] = img1[x];
|
||||
img2_sum[x] = img2[x];
|
||||
img1_sq_sum[x] = img1[x] * img1[x];
|
||||
img2_sq_sum[x] = img2[x] * img2[x];
|
||||
img12_mul_sum[x] = img1[x] * img2[x];
|
||||
|
||||
img1_sum_ptr1[x] = 0;
|
||||
img2_sum_ptr1[x] = 0;
|
||||
img1_sq_sum_ptr1[x] = 0;
|
||||
img2_sq_sum_ptr1[x] = 0;
|
||||
img12_mul_sum_ptr1[x] = 0;
|
||||
}
|
||||
|
||||
//the main loop
|
||||
for (y = 1; y < height; y++)
|
||||
{
|
||||
img1 += stride_img1;
|
||||
img2 += stride_img2;
|
||||
|
||||
temp = (y - 1) % 9 * width;
|
||||
|
||||
img1_sum_ptr1 = img1_sum + temp;
|
||||
img2_sum_ptr1 = img2_sum + temp;
|
||||
img1_sq_sum_ptr1 = img1_sq_sum + temp;
|
||||
img2_sq_sum_ptr1 = img2_sq_sum + temp;
|
||||
img12_mul_sum_ptr1 = img12_mul_sum + temp;
|
||||
|
||||
temp = y % 9 * width;
|
||||
|
||||
img1_sum_ptr2 = img1_sum + temp;
|
||||
img2_sum_ptr2 = img2_sum + temp;
|
||||
img1_sq_sum_ptr2 = img1_sq_sum + temp;
|
||||
img2_sq_sum_ptr2 = img2_sq_sum + temp;
|
||||
img12_mul_sum_ptr2 = img12_mul_sum + temp;
|
||||
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
img1_sum_ptr2[x] = img1_sum_ptr1[x] + img1[x];
|
||||
img2_sum_ptr2[x] = img2_sum_ptr1[x] + img2[x];
|
||||
img1_sq_sum_ptr2[x] = img1_sq_sum_ptr1[x] + img1[x] * img1[x];
|
||||
img2_sq_sum_ptr2[x] = img2_sq_sum_ptr1[x] + img2[x] * img2[x];
|
||||
img12_mul_sum_ptr2[x] = img12_mul_sum_ptr1[x] + img1[x] * img2[x];
|
||||
}
|
||||
|
||||
if (y > 6)
|
||||
{
|
||||
//calculate the sum of the last 8 lines by subtracting the total sum of 8 lines back from the present sum
|
||||
temp = (y + 1) % 9 * width;
|
||||
|
||||
img1_sum_ptr1 = img1_sum + temp;
|
||||
img2_sum_ptr1 = img2_sum + temp;
|
||||
img1_sq_sum_ptr1 = img1_sq_sum + temp;
|
||||
img2_sq_sum_ptr1 = img2_sq_sum + temp;
|
||||
img12_mul_sum_ptr1 = img12_mul_sum + temp;
|
||||
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
img1_sum_ptr1[x] = img1_sum_ptr2[x] - img1_sum_ptr1[x];
|
||||
img2_sum_ptr1[x] = img2_sum_ptr2[x] - img2_sum_ptr1[x];
|
||||
img1_sq_sum_ptr1[x] = img1_sq_sum_ptr2[x] - img1_sq_sum_ptr1[x];
|
||||
img2_sq_sum_ptr1[x] = img2_sq_sum_ptr2[x] - img2_sq_sum_ptr1[x];
|
||||
img12_mul_sum_ptr1[x] = img12_mul_sum_ptr2[x] - img12_mul_sum_ptr1[x];
|
||||
}
|
||||
|
||||
//here we calculate the sum over the 8x8 block of pixels
|
||||
//this is done by sliding a window across the column sums for the last 8 lines
|
||||
//each time adding the new column sum, and subtracting the one which fell out of the window
|
||||
img1_block = 0;
|
||||
img2_block = 0;
|
||||
img1_sq_block = 0;
|
||||
img2_sq_block = 0;
|
||||
img12_mul_block = 0;
|
||||
|
||||
//prologue, and calculation of simularity measure from the first 8 column sums
|
||||
for (x = 0; x < 8; x++)
|
||||
{
|
||||
img1_block += img1_sum_ptr1[x];
|
||||
img2_block += img2_sum_ptr1[x];
|
||||
img1_sq_block += img1_sq_sum_ptr1[x];
|
||||
img2_sq_block += img2_sq_sum_ptr1[x];
|
||||
img12_mul_block += img12_mul_sum_ptr1[x];
|
||||
}
|
||||
|
||||
if (lumimask)
|
||||
{
|
||||
y2 = y - 7;
|
||||
x2 = 0;
|
||||
|
||||
if (luminance)
|
||||
{
|
||||
mean = (img2_block + img1_block) / 128.0f;
|
||||
|
||||
if (!(y2 % 2 || x2 % 2))
|
||||
*(img12_sum_block + y2 / 2 * width_uv + x2 / 2) = img2_block + img1_block;
|
||||
}
|
||||
else
|
||||
{
|
||||
mean = *(img12_sum_block + y2 * width_uv + x2);
|
||||
mean += *(img12_sum_block + y2 * width_uv + x2 + 4);
|
||||
mean += *(img12_sum_block + (y2 + 4) * width_uv + x2);
|
||||
mean += *(img12_sum_block + (y2 + 4) * width_uv + x2 + 4);
|
||||
|
||||
mean /= 512.0f;
|
||||
}
|
||||
|
||||
weight = mean < 40 ? 0.0f :
|
||||
(mean < 50 ? (mean - 40.0f) / 10.0f : 1.0f);
|
||||
plane_summed_weights += weight;
|
||||
|
||||
plane_quality += weight * vp8_similarity(img1_block, img2_block, img1_sq_block, img2_sq_block, img12_mul_block);
|
||||
}
|
||||
else
|
||||
plane_quality += vp8_similarity(img1_block, img2_block, img1_sq_block, img2_sq_block, img12_mul_block);
|
||||
|
||||
//and for the rest
|
||||
for (x = 8; x < width; x++)
|
||||
{
|
||||
img1_block = img1_block + img1_sum_ptr1[x] - img1_sum_ptr1[x - 8];
|
||||
img2_block = img2_block + img2_sum_ptr1[x] - img2_sum_ptr1[x - 8];
|
||||
img1_sq_block = img1_sq_block + img1_sq_sum_ptr1[x] - img1_sq_sum_ptr1[x - 8];
|
||||
img2_sq_block = img2_sq_block + img2_sq_sum_ptr1[x] - img2_sq_sum_ptr1[x - 8];
|
||||
img12_mul_block = img12_mul_block + img12_mul_sum_ptr1[x] - img12_mul_sum_ptr1[x - 8];
|
||||
|
||||
if (lumimask)
|
||||
{
|
||||
y2 = y - 7;
|
||||
x2 = x - 7;
|
||||
|
||||
if (luminance)
|
||||
{
|
||||
mean = (img2_block + img1_block) / 128.0f;
|
||||
|
||||
if (!(y2 % 2 || x2 % 2))
|
||||
*(img12_sum_block + y2 / 2 * width_uv + x2 / 2) = img2_block + img1_block;
|
||||
}
|
||||
else
|
||||
{
|
||||
mean = *(img12_sum_block + y2 * width_uv + x2);
|
||||
mean += *(img12_sum_block + y2 * width_uv + x2 + 4);
|
||||
mean += *(img12_sum_block + (y2 + 4) * width_uv + x2);
|
||||
mean += *(img12_sum_block + (y2 + 4) * width_uv + x2 + 4);
|
||||
|
||||
mean /= 512.0f;
|
||||
}
|
||||
|
||||
weight = mean < 40 ? 0.0f :
|
||||
(mean < 50 ? (mean - 40.0f) / 10.0f : 1.0f);
|
||||
plane_summed_weights += weight;
|
||||
|
||||
plane_quality += weight * vp8_similarity(img1_block, img2_block, img1_sq_block, img2_sq_block, img12_mul_block);
|
||||
}
|
||||
else
|
||||
plane_quality += vp8_similarity(img1_block, img2_block, img1_sq_block, img2_sq_block, img12_mul_block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (plane_summed_weights == 0)
|
||||
return 1.0f;
|
||||
else
|
||||
return plane_quality / plane_summed_weights;
|
||||
}
|
||||
|
||||
double vp8_calc_ssim
|
||||
(
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *dest,
|
||||
int lumamask,
|
||||
double *weight
|
||||
)
|
||||
{
|
||||
double a, b, c;
|
||||
double frame_weight;
|
||||
double ssimv;
|
||||
|
||||
width_y = source->y_width;
|
||||
height_y = source->y_height;
|
||||
height_uv = source->uv_height;
|
||||
width_uv = source->uv_width;
|
||||
stride_uv = dest->uv_stride;
|
||||
stride = dest->y_stride;
|
||||
|
||||
lumimask = lumamask;
|
||||
|
||||
luminance = 1;
|
||||
a = vp8_ssim(source->y_buffer, dest->y_buffer,
|
||||
source->y_stride, dest->y_stride, source->y_width, source->y_height);
|
||||
luminance = 0;
|
||||
|
||||
frame_weight = plane_summed_weights / ((width_y - 7) * (height_y - 7));
|
||||
|
||||
if (frame_weight == 0)
|
||||
a = b = c = 1.0f;
|
||||
else
|
||||
{
|
||||
b = vp8_ssim(source->u_buffer, dest->u_buffer,
|
||||
source->uv_stride, dest->uv_stride, source->uv_width, source->uv_height);
|
||||
|
||||
c = vp8_ssim(source->v_buffer, dest->v_buffer,
|
||||
source->uv_stride, dest->uv_stride, source->uv_width, source->uv_height);
|
||||
}
|
||||
|
||||
ssimv = a * .8 + .1 * (b + c);
|
||||
|
||||
*weight = frame_weight;
|
||||
|
||||
return ssimv;
|
||||
}
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
#define IF_RTCD(x) (x)
|
||||
#else
|
||||
#define IF_RTCD(x) NULL
|
||||
#endif
|
||||
// Google version of SSIM
|
||||
// SSIM
|
||||
#define KERNEL 3
|
||||
@@ -520,3 +235,174 @@ double vp8_calc_ssimg
|
||||
*ssim_v /= uvsize;
|
||||
return ssim_all;
|
||||
}
|
||||
|
||||
|
||||
void ssim_parms_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int sp,
|
||||
unsigned char *r,
|
||||
int rp,
|
||||
unsigned long *sum_s,
|
||||
unsigned long *sum_r,
|
||||
unsigned long *sum_sq_s,
|
||||
unsigned long *sum_sq_r,
|
||||
unsigned long *sum_sxr
|
||||
)
|
||||
{
|
||||
int i,j;
|
||||
for(i=0;i<16;i++,s+=sp,r+=rp)
|
||||
{
|
||||
for(j=0;j<16;j++)
|
||||
{
|
||||
*sum_s += s[j];
|
||||
*sum_r += r[j];
|
||||
*sum_sq_s += s[j] * s[j];
|
||||
*sum_sq_r += r[j] * r[j];
|
||||
*sum_sxr += s[j] * r[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
void ssim_parms_8x8_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int sp,
|
||||
unsigned char *r,
|
||||
int rp,
|
||||
unsigned long *sum_s,
|
||||
unsigned long *sum_r,
|
||||
unsigned long *sum_sq_s,
|
||||
unsigned long *sum_sq_r,
|
||||
unsigned long *sum_sxr
|
||||
)
|
||||
{
|
||||
int i,j;
|
||||
for(i=0;i<8;i++,s+=sp,r+=rp)
|
||||
{
|
||||
for(j=0;j<8;j++)
|
||||
{
|
||||
*sum_s += s[j];
|
||||
*sum_r += r[j];
|
||||
*sum_sq_s += s[j] * s[j];
|
||||
*sum_sq_r += r[j] * r[j];
|
||||
*sum_sxr += s[j] * r[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const static long long c1 = 426148; // (256^2*(.01*255)^2
|
||||
const static long long c2 = 3835331; //(256^2*(.03*255)^2
|
||||
|
||||
static double similarity
|
||||
(
|
||||
unsigned long sum_s,
|
||||
unsigned long sum_r,
|
||||
unsigned long sum_sq_s,
|
||||
unsigned long sum_sq_r,
|
||||
unsigned long sum_sxr,
|
||||
int count
|
||||
)
|
||||
{
|
||||
long long ssim_n = (2*sum_s*sum_r+ c1)*(2*count*sum_sxr-2*sum_s*sum_r+c2);
|
||||
|
||||
long long ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
|
||||
(count*sum_sq_s-sum_s*sum_s + count*sum_sq_r-sum_r*sum_r +c2) ;
|
||||
|
||||
return ssim_n * 1.0 / ssim_d;
|
||||
}
|
||||
|
||||
static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd)
|
||||
{
|
||||
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
||||
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
||||
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
|
||||
}
|
||||
static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd)
|
||||
{
|
||||
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
||||
rtcd->ssimpf_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
||||
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
|
||||
}
|
||||
|
||||
// TODO: (jbb) tried to scale this function such that we may be able to use it
|
||||
// for distortion metric in mode selection code ( provided we do a reconstruction)
|
||||
long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd)
|
||||
{
|
||||
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
||||
double ssim3;
|
||||
long long ssim_n;
|
||||
long long ssim_d;
|
||||
|
||||
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
||||
ssim_n = (2*sum_s*sum_r+ c1)*(2*256*sum_sxr-2*sum_s*sum_r+c2);
|
||||
|
||||
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
|
||||
(256*sum_sq_s-sum_s*sum_s + 256*sum_sq_r-sum_r*sum_r +c2) ;
|
||||
|
||||
ssim3 = 256 * (ssim_d-ssim_n) / ssim_d;
|
||||
return (long)( 256*ssim3 * ssim3 );
|
||||
}
|
||||
// TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
|
||||
// such that the window regions overlap block boundaries to penalize blocking
|
||||
// artifacts.
|
||||
|
||||
double vp8_ssim2
|
||||
(
|
||||
unsigned char *img1,
|
||||
unsigned char *img2,
|
||||
int stride_img1,
|
||||
int stride_img2,
|
||||
int width,
|
||||
int height,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd
|
||||
)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
double ssim_total=0;
|
||||
|
||||
// we can sample points as frequently as we like start with 1 per 8x8
|
||||
for(i=0; i < height; i+=8, img1 += stride_img1*8, img2 += stride_img2*8)
|
||||
{
|
||||
for(j=0; j < width; j+=8 )
|
||||
{
|
||||
ssim_total += ssim_8x8(img1, stride_img1, img2, stride_img2, rtcd);
|
||||
}
|
||||
}
|
||||
ssim_total /= (width/8 * height /8);
|
||||
return ssim_total;
|
||||
|
||||
}
|
||||
double vp8_calc_ssim
|
||||
(
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *dest,
|
||||
int lumamask,
|
||||
double *weight,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd
|
||||
)
|
||||
{
|
||||
double a, b, c;
|
||||
double ssimv;
|
||||
|
||||
a = vp8_ssim2(source->y_buffer, dest->y_buffer,
|
||||
source->y_stride, dest->y_stride, source->y_width,
|
||||
source->y_height, rtcd);
|
||||
|
||||
b = vp8_ssim2(source->u_buffer, dest->u_buffer,
|
||||
source->uv_stride, dest->uv_stride, source->uv_width,
|
||||
source->uv_height, rtcd);
|
||||
|
||||
c = vp8_ssim2(source->v_buffer, dest->v_buffer,
|
||||
source->uv_stride, dest->uv_stride, source->uv_width,
|
||||
source->uv_height, rtcd);
|
||||
|
||||
ssimv = a * .8 + .1 * (b + c);
|
||||
|
||||
*weight = 1;
|
||||
|
||||
return ssimv;
|
||||
}
|
||||
|
@@ -287,8 +287,7 @@ static void vp8_temporal_filter_iterate_c
|
||||
int byte;
|
||||
int frame;
|
||||
int mb_col, mb_row;
|
||||
unsigned int filter_weight[MAX_LAG_BUFFERS];
|
||||
unsigned char *mm_ptr = cpi->fp_motion_map;
|
||||
unsigned int filter_weight;
|
||||
int mb_cols = cpi->common.mb_cols;
|
||||
int mb_rows = cpi->common.mb_rows;
|
||||
int MBs = cpi->common.MBs;
|
||||
@@ -306,13 +305,6 @@ static void vp8_temporal_filter_iterate_c
|
||||
unsigned char *u_buffer = mbd->pre.u_buffer;
|
||||
unsigned char *v_buffer = mbd->pre.v_buffer;
|
||||
|
||||
if (!cpi->use_weighted_temporal_filter)
|
||||
{
|
||||
// Temporal filtering is unweighted
|
||||
for (frame = 0; frame < frame_count; frame++)
|
||||
filter_weight[frame] = 1;
|
||||
}
|
||||
|
||||
for (mb_row = 0; mb_row < mb_rows; mb_row++)
|
||||
{
|
||||
#if ALT_REF_MC_ENABLED
|
||||
@@ -338,34 +330,9 @@ static void vp8_temporal_filter_iterate_c
|
||||
+ (VP8BORDERINPIXELS - 19);
|
||||
#endif
|
||||
|
||||
// Read & process macroblock weights from motion map
|
||||
if (cpi->use_weighted_temporal_filter)
|
||||
{
|
||||
weight_cap = 2;
|
||||
|
||||
for (frame = alt_ref_index-1; frame >= 0; frame--)
|
||||
{
|
||||
w = *(mm_ptr + (frame+1)*MBs);
|
||||
filter_weight[frame] = w < weight_cap ? w : weight_cap;
|
||||
weight_cap = w;
|
||||
}
|
||||
|
||||
filter_weight[alt_ref_index] = 2;
|
||||
|
||||
weight_cap = 2;
|
||||
|
||||
for (frame = alt_ref_index+1; frame < frame_count; frame++)
|
||||
{
|
||||
w = *(mm_ptr + frame*MBs);
|
||||
filter_weight[frame] = w < weight_cap ? w : weight_cap;
|
||||
weight_cap = w;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (frame = 0; frame < frame_count; frame++)
|
||||
{
|
||||
int err;
|
||||
int err = 0;
|
||||
|
||||
if (cpi->frames[frame] == NULL)
|
||||
continue;
|
||||
@@ -374,28 +341,25 @@ static void vp8_temporal_filter_iterate_c
|
||||
mbd->block[0].bmi.mv.as_mv.col = 0;
|
||||
|
||||
#if ALT_REF_MC_ENABLED
|
||||
//if (filter_weight[frame] == 0)
|
||||
{
|
||||
#define THRESH_LOW 10000
|
||||
#define THRESH_HIGH 20000
|
||||
|
||||
// Correlation has been lost try MC
|
||||
err = vp8_temporal_filter_find_matching_mb_c
|
||||
(cpi,
|
||||
cpi->frames[alt_ref_index],
|
||||
cpi->frames[frame],
|
||||
mb_y_offset,
|
||||
THRESH_LOW);
|
||||
// Find best match in this frame by MC
|
||||
err = vp8_temporal_filter_find_matching_mb_c
|
||||
(cpi,
|
||||
cpi->frames[alt_ref_index],
|
||||
cpi->frames[frame],
|
||||
mb_y_offset,
|
||||
THRESH_LOW);
|
||||
|
||||
if (filter_weight[frame] < 2)
|
||||
{
|
||||
// Set weight depending on error
|
||||
filter_weight[frame] = err<THRESH_LOW
|
||||
? 2 : err<THRESH_HIGH ? 1 : 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (filter_weight[frame] != 0)
|
||||
// Assign higher weight to matching MB if it's error
|
||||
// score is lower. If not applying MC default behavior
|
||||
// is to weight all MBs equal.
|
||||
filter_weight = err<THRESH_LOW
|
||||
? 2 : err<THRESH_HIGH ? 1 : 0;
|
||||
|
||||
if (filter_weight != 0)
|
||||
{
|
||||
// Construct the predictors
|
||||
vp8_temporal_filter_predictors_mb_c
|
||||
@@ -415,7 +379,7 @@ static void vp8_temporal_filter_iterate_c
|
||||
predictor,
|
||||
16,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
filter_weight,
|
||||
accumulator,
|
||||
count);
|
||||
|
||||
@@ -425,7 +389,7 @@ static void vp8_temporal_filter_iterate_c
|
||||
predictor + 256,
|
||||
8,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
filter_weight,
|
||||
accumulator + 256,
|
||||
count + 256);
|
||||
|
||||
@@ -435,7 +399,7 @@ static void vp8_temporal_filter_iterate_c
|
||||
predictor + 320,
|
||||
8,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
filter_weight,
|
||||
accumulator + 320,
|
||||
count + 320);
|
||||
}
|
||||
@@ -491,7 +455,6 @@ static void vp8_temporal_filter_iterate_c
|
||||
byte += stride - 8;
|
||||
}
|
||||
|
||||
mm_ptr++;
|
||||
mb_y_offset += 16;
|
||||
mb_uv_offset += 8;
|
||||
}
|
||||
|
@@ -26,9 +26,9 @@ _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef
|
||||
void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
|
||||
void vp8_fix_contexts(MACROBLOCKD *x);
|
||||
|
||||
TOKENVALUE vp8_dct_value_tokens[DCT_MAX_VALUE*2];
|
||||
static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE*2];
|
||||
const TOKENVALUE *vp8_dct_value_tokens_ptr;
|
||||
int vp8_dct_value_cost[DCT_MAX_VALUE*2];
|
||||
static int dct_value_cost[DCT_MAX_VALUE*2];
|
||||
const int *vp8_dct_value_cost_ptr;
|
||||
#if 0
|
||||
int skip_true_count = 0;
|
||||
@@ -37,7 +37,7 @@ int skip_false_count = 0;
|
||||
static void fill_value_tokens()
|
||||
{
|
||||
|
||||
TOKENVALUE *const t = vp8_dct_value_tokens + DCT_MAX_VALUE;
|
||||
TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE;
|
||||
vp8_extra_bit_struct *const e = vp8_extra_bits;
|
||||
|
||||
int i = -DCT_MAX_VALUE;
|
||||
@@ -81,7 +81,7 @@ static void fill_value_tokens()
|
||||
cost += vp8_treed_cost(p->tree, p->prob, extra >> 1, Length);
|
||||
|
||||
cost += vp8_cost_bit(vp8_prob_half, extra & 1); /* sign */
|
||||
vp8_dct_value_cost[i + DCT_MAX_VALUE] = cost;
|
||||
dct_value_cost[i + DCT_MAX_VALUE] = cost;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -89,8 +89,8 @@ static void fill_value_tokens()
|
||||
}
|
||||
while (++i < DCT_MAX_VALUE);
|
||||
|
||||
vp8_dct_value_tokens_ptr = vp8_dct_value_tokens + DCT_MAX_VALUE;
|
||||
vp8_dct_value_cost_ptr = vp8_dct_value_cost + DCT_MAX_VALUE;
|
||||
vp8_dct_value_tokens_ptr = dct_value_tokens + DCT_MAX_VALUE;
|
||||
vp8_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE;
|
||||
}
|
||||
|
||||
static void tokenize2nd_order_b
|
||||
|
@@ -85,6 +85,19 @@
|
||||
unsigned int *sse \
|
||||
);
|
||||
|
||||
#define prototype_ssimpf(sym) \
|
||||
void (sym) \
|
||||
( \
|
||||
unsigned char *s, \
|
||||
int sp, \
|
||||
unsigned char *r, \
|
||||
int rp, \
|
||||
unsigned long *sum_s, \
|
||||
unsigned long *sum_r, \
|
||||
unsigned long *sum_sq_s, \
|
||||
unsigned long *sum_sq_r, \
|
||||
unsigned long *sum_sxr \
|
||||
);
|
||||
|
||||
#define prototype_getmbss(sym) unsigned int (sym)(const short *)
|
||||
|
||||
@@ -306,6 +319,15 @@ extern prototype_variance2(vp8_variance_get16x16var);
|
||||
#endif
|
||||
extern prototype_sad(vp8_variance_get4x4sse_cs);
|
||||
|
||||
#ifndef vp8_ssimpf
|
||||
#define vp8_ssimpf ssim_parms_c
|
||||
#endif
|
||||
extern prototype_ssimpf(vp8_ssimpf)
|
||||
|
||||
#ifndef vp8_ssimpf_8x8
|
||||
#define vp8_ssimpf_8x8 ssim_parms_8x8_c
|
||||
#endif
|
||||
extern prototype_ssimpf(vp8_ssimpf_8x8)
|
||||
|
||||
typedef prototype_sad(*vp8_sad_fn_t);
|
||||
typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
|
||||
@@ -315,6 +337,10 @@ typedef prototype_variance(*vp8_variance_fn_t);
|
||||
typedef prototype_variance2(*vp8_variance2_fn_t);
|
||||
typedef prototype_subpixvariance(*vp8_subpixvariance_fn_t);
|
||||
typedef prototype_getmbss(*vp8_getmbss_fn_t);
|
||||
|
||||
typedef prototype_ssimpf(*vp8_ssimpf_fn_t)
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
vp8_sad_fn_t sad4x4;
|
||||
@@ -365,6 +391,11 @@ typedef struct
|
||||
vp8_sad_multi_d_fn_t sad8x8x4d;
|
||||
vp8_sad_multi_d_fn_t sad4x4x4d;
|
||||
|
||||
#if CONFIG_PSNR
|
||||
vp8_ssimpf_fn_t ssimpf_8x8;
|
||||
vp8_ssimpf_fn_t ssimpf;
|
||||
#endif
|
||||
|
||||
} vp8_variance_rtcd_vtable_t;
|
||||
|
||||
typedef struct
|
||||
@@ -378,6 +409,7 @@ typedef struct
|
||||
vp8_sad_multi_fn_t sdx3f;
|
||||
vp8_sad_multi1_fn_t sdx8f;
|
||||
vp8_sad_multi_d_fn_t sdx4df;
|
||||
|
||||
} vp8_variance_fn_ptr_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@@ -10,33 +10,8 @@
|
||||
|
||||
|
||||
#include "variance.h"
|
||||
#include "vp8/common/filter.h"
|
||||
|
||||
const int vp8_six_tap[8][6] =
|
||||
{
|
||||
{ 0, 0, 128, 0, 0, 0 }, // note that 1/8 pel positions are just as per alpha -0.5 bicubic
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, // New 1/4 pel 6 tap filter
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, // New 1/2 pel 6 tap filter
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, // New 1/4 pel 6 tap filter
|
||||
{ 0, -1, 12, 123, -6, 0 }
|
||||
};
|
||||
|
||||
|
||||
const int VP8_FILTER_WEIGHT = 128;
|
||||
const int VP8_FILTER_SHIFT = 7;
|
||||
const int vp8_bilinear_taps[8][2] =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
unsigned int vp8_get_mb_ss_c
|
||||
(
|
||||
@@ -56,7 +31,7 @@ unsigned int vp8_get_mb_ss_c
|
||||
}
|
||||
|
||||
|
||||
void vp8_variance(
|
||||
static void variance(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
@@ -98,7 +73,7 @@ vp8_get8x8var_c
|
||||
)
|
||||
{
|
||||
|
||||
vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum);
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum);
|
||||
return (*SSE - (((*Sum) * (*Sum)) >> 6));
|
||||
}
|
||||
|
||||
@@ -114,7 +89,7 @@ vp8_get16x16var_c
|
||||
)
|
||||
{
|
||||
|
||||
vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum);
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum);
|
||||
return (*SSE - (((*Sum) * (*Sum)) >> 8));
|
||||
|
||||
}
|
||||
@@ -132,7 +107,7 @@ unsigned int vp8_variance16x16_c(
|
||||
int avg;
|
||||
|
||||
|
||||
vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 8));
|
||||
}
|
||||
@@ -148,7 +123,7 @@ unsigned int vp8_variance8x16_c(
|
||||
int avg;
|
||||
|
||||
|
||||
vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 7));
|
||||
}
|
||||
@@ -164,7 +139,7 @@ unsigned int vp8_variance16x8_c(
|
||||
int avg;
|
||||
|
||||
|
||||
vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 7));
|
||||
}
|
||||
@@ -181,7 +156,7 @@ unsigned int vp8_variance8x8_c(
|
||||
int avg;
|
||||
|
||||
|
||||
vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 6));
|
||||
}
|
||||
@@ -197,7 +172,7 @@ unsigned int vp8_variance4x4_c(
|
||||
int avg;
|
||||
|
||||
|
||||
vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 4));
|
||||
}
|
||||
@@ -213,7 +188,7 @@ unsigned int vp8_mse16x16_c(
|
||||
unsigned int var;
|
||||
int avg;
|
||||
|
||||
vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
|
||||
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
|
||||
*sse = var;
|
||||
return var;
|
||||
}
|
||||
@@ -247,7 +222,7 @@ unsigned int vp8_mse16x16_c(
|
||||
* to the next.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8e_filter_block2d_bil_first_pass
|
||||
static void var_filter_block2d_bil_first_pass
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
@@ -255,7 +230,7 @@ void vp8e_filter_block2d_bil_first_pass
|
||||
int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int *vp8_filter
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
@@ -305,7 +280,7 @@ void vp8e_filter_block2d_bil_first_pass
|
||||
* to the next.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8e_filter_block2d_bil_second_pass
|
||||
static void var_filter_block2d_bil_second_pass
|
||||
(
|
||||
const unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
@@ -313,7 +288,7 @@ void vp8e_filter_block2d_bil_second_pass
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int *vp8_filter
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
@@ -338,52 +313,6 @@ void vp8e_filter_block2d_bil_second_pass
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pixels_per_line : Stride of input block.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter taps.
|
||||
* INT32 *VFilter : Array of 2 vertical filter taps.
|
||||
*
|
||||
* OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : 2-D filters an 8x8 input block by applying a 2-tap
|
||||
* bi-linear filter horizontally followed by a 2-tap
|
||||
* bi-linear filter vertically on the result.
|
||||
*
|
||||
* SPECIAL NOTES : The intermediate horizontally filtered block must produce
|
||||
* 1 more point than the input block in each column. This
|
||||
* is to ensure that the 2-tap filter has one extra data-point
|
||||
* at the top of each column so filter taps do not extend
|
||||
* beyond data. Thus the output of the first stage filter
|
||||
* is an 8x9 (hx_v) block.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8e_filter_block2d_bil
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
int *HFilter,
|
||||
int *VFilter
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[20*16]; // Temp data bufffer used in filtering
|
||||
|
||||
// First filter 1-D horizontally...
|
||||
vp8e_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
|
||||
// then 1-D vertically...
|
||||
vp8e_filter_block2d_bil_second_pass(FData, output_ptr, 8, 8, 8, 8, VFilter);
|
||||
}
|
||||
|
||||
|
||||
|
||||
unsigned int vp8_sub_pixel_variance4x4_c
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
@@ -396,17 +325,17 @@ unsigned int vp8_sub_pixel_variance4x4_c
|
||||
)
|
||||
{
|
||||
unsigned char temp2[20*16];
|
||||
const int *HFilter, *VFilter;
|
||||
const short *HFilter, *VFilter;
|
||||
unsigned short FData3[5*4]; // Temp data bufffer used in filtering
|
||||
|
||||
HFilter = vp8_bilinear_taps[xoffset];
|
||||
VFilter = vp8_bilinear_taps[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
// First filter 1d Horizontal
|
||||
vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
|
||||
|
||||
// Now filter Verticaly
|
||||
vp8e_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
|
||||
|
||||
return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
@@ -425,13 +354,13 @@ unsigned int vp8_sub_pixel_variance8x8_c
|
||||
{
|
||||
unsigned short FData3[9*8]; // Temp data bufffer used in filtering
|
||||
unsigned char temp2[20*16];
|
||||
const int *HFilter, *VFilter;
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_taps[xoffset];
|
||||
VFilter = vp8_bilinear_taps[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
|
||||
|
||||
return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
@@ -449,13 +378,13 @@ unsigned int vp8_sub_pixel_variance16x16_c
|
||||
{
|
||||
unsigned short FData3[17*16]; // Temp data bufffer used in filtering
|
||||
unsigned char temp2[20*16];
|
||||
const int *HFilter, *VFilter;
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_taps[xoffset];
|
||||
VFilter = vp8_bilinear_taps[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
|
||||
vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
|
||||
|
||||
return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
@@ -525,13 +454,13 @@ unsigned int vp8_sub_pixel_variance16x8_c
|
||||
{
|
||||
unsigned short FData3[16*9]; // Temp data bufffer used in filtering
|
||||
unsigned char temp2[20*16];
|
||||
const int *HFilter, *VFilter;
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_taps[xoffset];
|
||||
VFilter = vp8_bilinear_taps[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
|
||||
vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
|
||||
|
||||
return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
@@ -549,15 +478,15 @@ unsigned int vp8_sub_pixel_variance8x16_c
|
||||
{
|
||||
unsigned short FData3[9*16]; // Temp data bufffer used in filtering
|
||||
unsigned char temp2[20*16];
|
||||
const int *HFilter, *VFilter;
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
|
||||
HFilter = vp8_bilinear_taps[xoffset];
|
||||
VFilter = vp8_bilinear_taps[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
|
||||
vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
|
||||
vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
|
||||
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
|
||||
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
|
||||
|
||||
return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
|
@@ -186,7 +186,7 @@ sym(vp8_sad16x16x8_sse4):
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@@ -224,7 +224,7 @@ sym(vp8_sad16x8x8_sse4):
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@@ -262,7 +262,7 @@ sym(vp8_sad8x8x8_sse4):
|
||||
PROCESS_8X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@@ -303,7 +303,7 @@ sym(vp8_sad8x16x8_sse4):
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
@@ -339,7 +339,7 @@ sym(vp8_sad4x4x8_sse4):
|
||||
PROCESS_4X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
movdqa XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
|
215
vp8/encoder/x86/ssim_opt.asm
Normal file
215
vp8/encoder/x86/ssim_opt.asm
Normal file
@@ -0,0 +1,215 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
|
||||
%macro TABULATE_SSIM 0
|
||||
paddusw xmm15, xmm3 ; sum_s
|
||||
paddusw xmm14, xmm4 ; sum_r
|
||||
movdqa xmm1, xmm3
|
||||
pmaddwd xmm1, xmm1
|
||||
paddq xmm13, xmm1 ; sum_sq_s
|
||||
movdqa xmm2, xmm4
|
||||
pmaddwd xmm2, xmm2
|
||||
paddq xmm12, xmm2 ; sum_sq_r
|
||||
pmaddwd xmm3, xmm4
|
||||
paddq xmm11, xmm3 ; sum_sxr
|
||||
%endmacro
|
||||
|
||||
; Sum across the register %1 starting with q words
|
||||
%macro SUM_ACROSS_Q 1
|
||||
movdqa xmm2,%1
|
||||
punpckldq %1,xmm0
|
||||
punpckhdq xmm2,xmm0
|
||||
paddq %1,xmm2
|
||||
movdqa xmm2,%1
|
||||
punpcklqdq %1,xmm0
|
||||
punpckhqdq xmm2,xmm0
|
||||
paddq %1,xmm2
|
||||
%endmacro
|
||||
|
||||
; Sum across the register %1 starting with q words
|
||||
%macro SUM_ACROSS_W 1
|
||||
movdqa xmm1, %1
|
||||
punpcklwd %1,xmm0
|
||||
punpckhwd xmm1,xmm0
|
||||
paddd %1, xmm1
|
||||
SUM_ACROSS_Q %1
|
||||
%endmacro
|
||||
;void ssim_parms_sse3(
|
||||
; unsigned char *s,
|
||||
; int sp,
|
||||
; unsigned char *r,
|
||||
; int rp
|
||||
; unsigned long *sum_s,
|
||||
; unsigned long *sum_r,
|
||||
; unsigned long *sum_sq_s,
|
||||
; unsigned long *sum_sq_r,
|
||||
; unsigned long *sum_sxr);
|
||||
;
|
||||
; TODO: Use parm passing through structure, probably don't need the pxors
|
||||
; ( calling app will initialize to 0 ) could easily fit everything in sse2
|
||||
; without too much hastle, and can probably do better estimates with psadw
|
||||
; or pavgb At this point this is just meant to be first pass for calculating
|
||||
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
|
||||
; in mode selection code.
|
||||
global sym(vp8_ssim_parms_16x16_sse3)
|
||||
sym(vp8_ssim_parms_16x16_sse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 9
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;s
|
||||
mov rcx, arg(1) ;sp
|
||||
mov rdi, arg(2) ;r
|
||||
mov rax, arg(3) ;rp
|
||||
|
||||
pxor xmm0, xmm0
|
||||
pxor xmm15,xmm15 ;sum_s
|
||||
pxor xmm14,xmm14 ;sum_r
|
||||
pxor xmm13,xmm13 ;sum_sq_s
|
||||
pxor xmm12,xmm12 ;sum_sq_r
|
||||
pxor xmm11,xmm11 ;sum_sxr
|
||||
|
||||
mov rdx, 16 ;row counter
|
||||
NextRow:
|
||||
|
||||
;grab source and reference pixels
|
||||
movdqu xmm5, [rsi]
|
||||
movdqu xmm6, [rdi]
|
||||
movdqa xmm3, xmm5
|
||||
movdqa xmm4, xmm6
|
||||
punpckhbw xmm3, xmm0 ; high_s
|
||||
punpckhbw xmm4, xmm0 ; high_r
|
||||
|
||||
TABULATE_SSIM
|
||||
|
||||
movdqa xmm3, xmm5
|
||||
movdqa xmm4, xmm6
|
||||
punpcklbw xmm3, xmm0 ; low_s
|
||||
punpcklbw xmm4, xmm0 ; low_r
|
||||
|
||||
TABULATE_SSIM
|
||||
|
||||
add rsi, rcx ; next s row
|
||||
add rdi, rax ; next r row
|
||||
|
||||
dec rdx ; counter
|
||||
jnz NextRow
|
||||
|
||||
SUM_ACROSS_W xmm15
|
||||
SUM_ACROSS_W xmm14
|
||||
SUM_ACROSS_Q xmm13
|
||||
SUM_ACROSS_Q xmm12
|
||||
SUM_ACROSS_Q xmm11
|
||||
|
||||
mov rdi,arg(4)
|
||||
movq [rdi], xmm15;
|
||||
mov rdi,arg(5)
|
||||
movq [rdi], xmm14;
|
||||
mov rdi,arg(6)
|
||||
movq [rdi], xmm13;
|
||||
mov rdi,arg(7)
|
||||
movq [rdi], xmm12;
|
||||
mov rdi,arg(8)
|
||||
movq [rdi], xmm11;
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void ssim_parms_sse3(
|
||||
; unsigned char *s,
|
||||
; int sp,
|
||||
; unsigned char *r,
|
||||
; int rp
|
||||
; unsigned long *sum_s,
|
||||
; unsigned long *sum_r,
|
||||
; unsigned long *sum_sq_s,
|
||||
; unsigned long *sum_sq_r,
|
||||
; unsigned long *sum_sxr);
|
||||
;
|
||||
; TODO: Use parm passing through structure, probably don't need the pxors
|
||||
; ( calling app will initialize to 0 ) could easily fit everything in sse2
|
||||
; without too much hastle, and can probably do better estimates with psadw
|
||||
; or pavgb At this point this is just meant to be first pass for calculating
|
||||
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
|
||||
; in mode selection code.
|
||||
global sym(vp8_ssim_parms_8x8_sse3)
|
||||
sym(vp8_ssim_parms_8x8_sse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 9
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;s
|
||||
mov rcx, arg(1) ;sp
|
||||
mov rdi, arg(2) ;r
|
||||
mov rax, arg(3) ;rp
|
||||
|
||||
pxor xmm0, xmm0
|
||||
pxor xmm15,xmm15 ;sum_s
|
||||
pxor xmm14,xmm14 ;sum_r
|
||||
pxor xmm13,xmm13 ;sum_sq_s
|
||||
pxor xmm12,xmm12 ;sum_sq_r
|
||||
pxor xmm11,xmm11 ;sum_sxr
|
||||
|
||||
mov rdx, 8 ;row counter
|
||||
NextRow2:
|
||||
|
||||
;grab source and reference pixels
|
||||
movq xmm5, [rsi]
|
||||
movq xmm6, [rdi]
|
||||
|
||||
movdqa xmm3, xmm5
|
||||
movdqa xmm4, xmm6
|
||||
punpcklbw xmm3, xmm0 ; low_s
|
||||
punpcklbw xmm4, xmm0 ; low_r
|
||||
|
||||
TABULATE_SSIM
|
||||
|
||||
add rsi, rcx ; next s row
|
||||
add rdi, rax ; next r row
|
||||
|
||||
dec rdx ; counter
|
||||
jnz NextRow2
|
||||
|
||||
SUM_ACROSS_W xmm15
|
||||
SUM_ACROSS_W xmm14
|
||||
SUM_ACROSS_Q xmm13
|
||||
SUM_ACROSS_Q xmm12
|
||||
SUM_ACROSS_Q xmm11
|
||||
|
||||
mov rdi,arg(4)
|
||||
movq [rdi], xmm15;
|
||||
mov rdi,arg(5)
|
||||
movq [rdi], xmm14;
|
||||
mov rdi,arg(6)
|
||||
movq [rdi], xmm13;
|
||||
mov rdi,arg(7)
|
||||
movq [rdi], xmm12;
|
||||
mov rdi,arg(8)
|
||||
movq [rdi], xmm11;
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
@@ -627,6 +627,10 @@ filter_block2d_bil_var_sse2_loop:
|
||||
|
||||
filter_block2d_bil_var_sse2_sp_only:
|
||||
movsxd rdx, dword ptr arg(6) ; yoffset
|
||||
|
||||
cmp rdx, 0 ; skip all if both xoffset=0 and yoffset=0
|
||||
je filter_block2d_bil_var_sse2_full_pixel
|
||||
|
||||
shl rdx, 5
|
||||
lea rdx, [rdx + rcx] ; VFilter
|
||||
|
||||
@@ -671,6 +675,35 @@ filter_block2d_bil_sp_only_loop:
|
||||
|
||||
jmp filter_block2d_bil_variance
|
||||
|
||||
filter_block2d_bil_var_sse2_full_pixel:
|
||||
mov rsi, arg(0) ;ref_ptr
|
||||
mov rdi, arg(2) ;src_ptr
|
||||
movsxd rcx, dword ptr arg(4) ;Height
|
||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
|
||||
pxor xmm0, xmm0 ;
|
||||
|
||||
filter_block2d_bil_full_pixel_loop:
|
||||
movq xmm1, QWORD PTR [rsi] ;
|
||||
punpcklbw xmm1, xmm0 ;
|
||||
|
||||
movq xmm2, QWORD PTR [rdi] ;
|
||||
punpcklbw xmm2, xmm0 ;
|
||||
|
||||
psubw xmm1, xmm2 ;
|
||||
paddw xmm6, xmm1 ;
|
||||
|
||||
pmaddwd xmm1, xmm1 ;
|
||||
paddd xmm7, xmm1 ;
|
||||
|
||||
lea rsi, [rsi + rax] ;ref_pixels_per_line
|
||||
lea rdi, [rdi + rbx] ;src_pixels_per_line
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz filter_block2d_bil_full_pixel_loop ;
|
||||
|
||||
jmp filter_block2d_bil_variance
|
||||
|
||||
filter_block2d_bil_var_sse2_fp_only:
|
||||
mov rsi, arg(0) ;ref_ptr
|
||||
mov rdi, arg(2) ;src_ptr
|
||||
@@ -757,7 +790,7 @@ filter_block2d_bil_variance:
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_half_horiz_vert_variance16x_h_sse2
|
||||
;void vp8_half_horiz_vert_variance8x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
@@ -767,8 +800,8 @@ filter_block2d_bil_variance:
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_vert_variance16x_h_sse2)
|
||||
sym(vp8_half_horiz_vert_variance16x_h_sse2):
|
||||
global sym(vp8_half_horiz_vert_variance8x_h_sse2)
|
||||
sym(vp8_half_horiz_vert_variance8x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
@@ -802,7 +835,7 @@ sym(vp8_half_horiz_vert_variance16x_h_sse2):
|
||||
add rsi, r8
|
||||
%endif
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_1:
|
||||
vp8_half_horiz_vert_variance8x_h_1:
|
||||
|
||||
movq xmm1, QWORD PTR [rsi] ;
|
||||
movq xmm2, QWORD PTR [rsi+1] ;
|
||||
@@ -830,7 +863,7 @@ vp8_half_horiz_vert_variance16x_h_1:
|
||||
%endif
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz vp8_half_horiz_vert_variance16x_h_1 ;
|
||||
jnz vp8_half_horiz_vert_variance8x_h_1 ;
|
||||
|
||||
movdq2q mm6, xmm6 ;
|
||||
movdq2q mm7, xmm7 ;
|
||||
@@ -877,8 +910,7 @@ vp8_half_horiz_vert_variance16x_h_1:
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_half_vert_variance16x_h_sse2
|
||||
;void vp8_half_horiz_vert_variance16x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
@@ -888,8 +920,124 @@ vp8_half_horiz_vert_variance16x_h_1:
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_vert_variance16x_h_sse2)
|
||||
sym(vp8_half_vert_variance16x_h_sse2):
|
||||
global sym(vp8_half_horiz_vert_variance16x_h_sse2)
|
||||
sym(vp8_half_horiz_vert_variance16x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
pxor xmm6, xmm6 ; error accumulator
|
||||
pxor xmm7, xmm7 ; sse eaccumulator
|
||||
mov rsi, arg(0) ;ref_ptr ;
|
||||
|
||||
mov rdi, arg(2) ;src_ptr ;
|
||||
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
|
||||
|
||||
pxor xmm0, xmm0 ;
|
||||
|
||||
movdqu xmm5, XMMWORD PTR [rsi]
|
||||
movdqu xmm3, XMMWORD PTR [rsi+1]
|
||||
pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_1:
|
||||
movdqu xmm1, XMMWORD PTR [rsi] ;
|
||||
movdqu xmm2, XMMWORD PTR [rsi+1] ;
|
||||
pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
|
||||
|
||||
pavgb xmm5, xmm1 ; xmm = vertical average of the above
|
||||
|
||||
movdqa xmm4, xmm5
|
||||
punpcklbw xmm5, xmm0 ; xmm5 = words of above
|
||||
punpckhbw xmm4, xmm0
|
||||
|
||||
movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
|
||||
punpcklbw xmm3, xmm0 ; xmm3 = words of above
|
||||
psubw xmm5, xmm3 ; xmm5 -= xmm3
|
||||
|
||||
movq xmm3, QWORD PTR [rdi+8]
|
||||
punpcklbw xmm3, xmm0
|
||||
psubw xmm4, xmm3
|
||||
|
||||
paddw xmm6, xmm5 ; xmm6 += accumulated column differences
|
||||
paddw xmm6, xmm4
|
||||
pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
|
||||
pmaddwd xmm4, xmm4
|
||||
paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
|
||||
paddd xmm7, xmm4
|
||||
|
||||
movdqa xmm5, xmm1 ; save xmm1 for use on the next row
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
lea rdi, [rdi + rdx]
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz vp8_half_horiz_vert_variance16x_h_1 ;
|
||||
|
||||
pxor xmm1, xmm1
|
||||
pxor xmm5, xmm5
|
||||
|
||||
punpcklwd xmm0, xmm6
|
||||
punpckhwd xmm1, xmm6
|
||||
psrad xmm0, 16
|
||||
psrad xmm1, 16
|
||||
paddd xmm0, xmm1
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
movdqa xmm6, xmm7
|
||||
punpckldq xmm6, xmm5
|
||||
punpckhdq xmm7, xmm5
|
||||
paddd xmm6, xmm7
|
||||
|
||||
punpckldq xmm0, xmm5
|
||||
punpckhdq xmm1, xmm5
|
||||
paddd xmm0, xmm1
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
psrldq xmm7, 8
|
||||
psrldq xmm1, 8
|
||||
|
||||
paddd xmm6, xmm7
|
||||
paddd xmm0, xmm1
|
||||
|
||||
mov rsi, arg(5) ;[Sum]
|
||||
mov rdi, arg(6) ;[SSE]
|
||||
|
||||
movd [rsi], xmm0
|
||||
movd [rdi], xmm6
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_half_vert_variance8x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned int Height,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_vert_variance8x_h_sse2)
|
||||
sym(vp8_half_vert_variance8x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
@@ -912,7 +1060,7 @@ sym(vp8_half_vert_variance16x_h_sse2):
|
||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
|
||||
pxor xmm0, xmm0 ;
|
||||
vp8_half_vert_variance16x_h_1:
|
||||
vp8_half_vert_variance8x_h_1:
|
||||
movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
|
||||
movq xmm3, QWORD PTR [rsi+rax] ; xmm3 = s1,s2,s3..s9
|
||||
|
||||
@@ -936,7 +1084,7 @@ vp8_half_vert_variance16x_h_1:
|
||||
%endif
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz vp8_half_vert_variance16x_h_1 ;
|
||||
jnz vp8_half_vert_variance8x_h_1 ;
|
||||
|
||||
movdq2q mm6, xmm6 ;
|
||||
movdq2q mm7, xmm7 ;
|
||||
@@ -983,8 +1131,7 @@ vp8_half_vert_variance16x_h_1:
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_half_horiz_variance16x_h_sse2
|
||||
;void vp8_half_vert_variance16x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
@@ -994,8 +1141,116 @@ vp8_half_vert_variance16x_h_1:
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_variance16x_h_sse2)
|
||||
sym(vp8_half_horiz_variance16x_h_sse2):
|
||||
global sym(vp8_half_vert_variance16x_h_sse2)
|
||||
sym(vp8_half_vert_variance16x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
pxor xmm6, xmm6 ; error accumulator
|
||||
pxor xmm7, xmm7 ; sse eaccumulator
|
||||
mov rsi, arg(0) ;ref_ptr
|
||||
|
||||
mov rdi, arg(2) ;src_ptr
|
||||
movsxd rcx, dword ptr arg(4) ;Height
|
||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
|
||||
|
||||
movdqu xmm5, XMMWORD PTR [rsi]
|
||||
lea rsi, [rsi + rax ]
|
||||
pxor xmm0, xmm0
|
||||
|
||||
vp8_half_vert_variance16x_h_1:
|
||||
movdqu xmm3, XMMWORD PTR [rsi]
|
||||
|
||||
pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
|
||||
movdqa xmm4, xmm5
|
||||
punpcklbw xmm5, xmm0
|
||||
punpckhbw xmm4, xmm0
|
||||
|
||||
movq xmm2, QWORD PTR [rdi]
|
||||
punpcklbw xmm2, xmm0
|
||||
psubw xmm5, xmm2
|
||||
movq xmm2, QWORD PTR [rdi+8]
|
||||
punpcklbw xmm2, xmm0
|
||||
psubw xmm4, xmm2
|
||||
|
||||
paddw xmm6, xmm5 ; xmm6 += accumulated column differences
|
||||
paddw xmm6, xmm4
|
||||
pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
|
||||
pmaddwd xmm4, xmm4
|
||||
paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
|
||||
paddd xmm7, xmm4
|
||||
|
||||
movdqa xmm5, xmm3
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
lea rdi, [rdi + rdx]
|
||||
|
||||
sub rcx, 1
|
||||
jnz vp8_half_vert_variance16x_h_1
|
||||
|
||||
pxor xmm1, xmm1
|
||||
pxor xmm5, xmm5
|
||||
|
||||
punpcklwd xmm0, xmm6
|
||||
punpckhwd xmm1, xmm6
|
||||
psrad xmm0, 16
|
||||
psrad xmm1, 16
|
||||
paddd xmm0, xmm1
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
movdqa xmm6, xmm7
|
||||
punpckldq xmm6, xmm5
|
||||
punpckhdq xmm7, xmm5
|
||||
paddd xmm6, xmm7
|
||||
|
||||
punpckldq xmm0, xmm5
|
||||
punpckhdq xmm1, xmm5
|
||||
paddd xmm0, xmm1
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
psrldq xmm7, 8
|
||||
psrldq xmm1, 8
|
||||
|
||||
paddd xmm6, xmm7
|
||||
paddd xmm0, xmm1
|
||||
|
||||
mov rsi, arg(5) ;[Sum]
|
||||
mov rdi, arg(6) ;[SSE]
|
||||
|
||||
movd [rsi], xmm0
|
||||
movd [rdi], xmm6
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_half_horiz_variance8x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned int Height,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_variance8x_h_sse2)
|
||||
sym(vp8_half_horiz_variance8x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
@@ -1017,7 +1272,7 @@ sym(vp8_half_horiz_variance16x_h_sse2):
|
||||
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||
|
||||
pxor xmm0, xmm0 ;
|
||||
vp8_half_horiz_variance16x16_1:
|
||||
vp8_half_horiz_variance8x_h_1:
|
||||
movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
|
||||
movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
|
||||
|
||||
@@ -1040,7 +1295,7 @@ vp8_half_horiz_variance16x16_1:
|
||||
add rdi, r9
|
||||
%endif
|
||||
sub rcx, 1 ;
|
||||
jnz vp8_half_horiz_variance16x16_1 ;
|
||||
jnz vp8_half_horiz_variance8x_h_1 ;
|
||||
|
||||
movdq2q mm6, xmm6 ;
|
||||
movdq2q mm7, xmm7 ;
|
||||
@@ -1087,6 +1342,109 @@ vp8_half_horiz_variance16x16_1:
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_half_horiz_variance16x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned int Height,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_variance16x_h_sse2)
|
||||
sym(vp8_half_horiz_variance16x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
pxor xmm6, xmm6 ; error accumulator
|
||||
pxor xmm7, xmm7 ; sse eaccumulator
|
||||
mov rsi, arg(0) ;ref_ptr ;
|
||||
|
||||
mov rdi, arg(2) ;src_ptr ;
|
||||
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
|
||||
|
||||
pxor xmm0, xmm0 ;
|
||||
|
||||
vp8_half_horiz_variance16x_h_1:
|
||||
movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15
|
||||
movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16
|
||||
|
||||
pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
|
||||
movdqa xmm1, xmm5
|
||||
punpcklbw xmm5, xmm0 ; xmm5 = words of above
|
||||
punpckhbw xmm1, xmm0
|
||||
|
||||
movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
|
||||
punpcklbw xmm3, xmm0 ; xmm3 = words of above
|
||||
movq xmm2, QWORD PTR [rdi+8]
|
||||
punpcklbw xmm2, xmm0
|
||||
|
||||
psubw xmm5, xmm3 ; xmm5 -= xmm3
|
||||
psubw xmm1, xmm2
|
||||
paddw xmm6, xmm5 ; xmm6 += accumulated column differences
|
||||
paddw xmm6, xmm1
|
||||
pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
|
||||
pmaddwd xmm1, xmm1
|
||||
paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
|
||||
paddd xmm7, xmm1
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
lea rdi, [rdi + rdx]
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz vp8_half_horiz_variance16x_h_1 ;
|
||||
|
||||
pxor xmm1, xmm1
|
||||
pxor xmm5, xmm5
|
||||
|
||||
punpcklwd xmm0, xmm6
|
||||
punpckhwd xmm1, xmm6
|
||||
psrad xmm0, 16
|
||||
psrad xmm1, 16
|
||||
paddd xmm0, xmm1
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
movdqa xmm6, xmm7
|
||||
punpckldq xmm6, xmm5
|
||||
punpckhdq xmm7, xmm5
|
||||
paddd xmm6, xmm7
|
||||
|
||||
punpckldq xmm0, xmm5
|
||||
punpckhdq xmm1, xmm5
|
||||
paddd xmm0, xmm1
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
psrldq xmm7, 8
|
||||
psrldq xmm1, 8
|
||||
|
||||
paddd xmm6, xmm7
|
||||
paddd xmm0, xmm1
|
||||
|
||||
mov rsi, arg(5) ;[Sum]
|
||||
mov rdi, arg(6) ;[SSE]
|
||||
|
||||
movd [rsi], xmm0
|
||||
movd [rdi], xmm6
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
; short xmm_bi_rd[8] = { 64, 64, 64, 64,64, 64, 64, 64};
|
||||
|
348
vp8/encoder/x86/variance_impl_ssse3.asm
Normal file
348
vp8/encoder/x86/variance_impl_ssse3.asm
Normal file
@@ -0,0 +1,348 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%define xmm_filter_shift 7
|
||||
|
||||
|
||||
;void vp8_filter_block2d_bil_var_ssse3
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned int Height,
|
||||
; int xoffset,
|
||||
; int yoffset,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared;;
|
||||
;
|
||||
;)
|
||||
;Note: The filter coefficient at offset=0 is 128. Since the second register
|
||||
;for Pmaddubsw is signed bytes, we must calculate zero offset seperately.
|
||||
global sym(vp8_filter_block2d_bil_var_ssse3)
|
||||
sym(vp8_filter_block2d_bil_var_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 9
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
; end prolog
|
||||
|
||||
pxor xmm6, xmm6
|
||||
pxor xmm7, xmm7
|
||||
|
||||
lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)]
|
||||
movsxd rax, dword ptr arg(5) ; xoffset
|
||||
|
||||
cmp rax, 0 ; skip first_pass filter if xoffset=0
|
||||
je filter_block2d_bil_var_ssse3_sp_only
|
||||
|
||||
shl rax, 4 ; point to filter coeff with xoffset
|
||||
lea rax, [rax + rcx] ; HFilter
|
||||
|
||||
movsxd rdx, dword ptr arg(6) ; yoffset
|
||||
|
||||
cmp rdx, 0 ; skip second_pass filter if yoffset=0
|
||||
je filter_block2d_bil_var_ssse3_fp_only
|
||||
|
||||
shl rdx, 4
|
||||
lea rdx, [rdx + rcx] ; VFilter
|
||||
|
||||
mov rsi, arg(0) ;ref_ptr
|
||||
mov rdi, arg(2) ;src_ptr
|
||||
movsxd rcx, dword ptr arg(4) ;Height
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rsi]
|
||||
movdqu xmm1, XMMWORD PTR [rsi+1]
|
||||
movdqa xmm2, xmm0
|
||||
|
||||
punpcklbw xmm0, xmm1
|
||||
punpckhbw xmm2, xmm1
|
||||
pmaddubsw xmm0, [rax]
|
||||
pmaddubsw xmm2, [rax]
|
||||
|
||||
paddw xmm0, [GLOBAL(xmm_bi_rd)]
|
||||
paddw xmm2, [GLOBAL(xmm_bi_rd)]
|
||||
psraw xmm0, xmm_filter_shift
|
||||
psraw xmm2, xmm_filter_shift
|
||||
|
||||
packuswb xmm0, xmm2
|
||||
|
||||
movsxd rbx, dword ptr arg(1) ;ref_pixels_per_line
|
||||
lea rsi, [rsi + rbx]
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r9, dword ptr arg(3) ;src_pixels_per_line
|
||||
%endif
|
||||
|
||||
filter_block2d_bil_var_ssse3_loop:
|
||||
movdqu xmm1, XMMWORD PTR [rsi]
|
||||
movdqu xmm2, XMMWORD PTR [rsi+1]
|
||||
movdqa xmm3, xmm1
|
||||
|
||||
punpcklbw xmm1, xmm2
|
||||
punpckhbw xmm3, xmm2
|
||||
pmaddubsw xmm1, [rax]
|
||||
pmaddubsw xmm3, [rax]
|
||||
|
||||
paddw xmm1, [GLOBAL(xmm_bi_rd)]
|
||||
paddw xmm3, [GLOBAL(xmm_bi_rd)]
|
||||
psraw xmm1, xmm_filter_shift
|
||||
psraw xmm3, xmm_filter_shift
|
||||
packuswb xmm1, xmm3
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
movdqa xmm0, xmm1
|
||||
movdqa xmm3, xmm2
|
||||
|
||||
punpcklbw xmm2, xmm1
|
||||
punpckhbw xmm3, xmm1
|
||||
pmaddubsw xmm2, [rdx]
|
||||
pmaddubsw xmm3, [rdx]
|
||||
|
||||
paddw xmm2, [GLOBAL(xmm_bi_rd)]
|
||||
paddw xmm3, [GLOBAL(xmm_bi_rd)]
|
||||
psraw xmm2, xmm_filter_shift
|
||||
psraw xmm3, xmm_filter_shift
|
||||
|
||||
movq xmm1, QWORD PTR [rdi]
|
||||
pxor xmm4, xmm4
|
||||
punpcklbw xmm1, xmm4
|
||||
movq xmm5, QWORD PTR [rdi+8]
|
||||
punpcklbw xmm5, xmm4
|
||||
|
||||
psubw xmm2, xmm1
|
||||
psubw xmm3, xmm5
|
||||
paddw xmm6, xmm2
|
||||
paddw xmm6, xmm3
|
||||
pmaddwd xmm2, xmm2
|
||||
pmaddwd xmm3, xmm3
|
||||
paddd xmm7, xmm2
|
||||
paddd xmm7, xmm3
|
||||
|
||||
lea rsi, [rsi + rbx] ;ref_pixels_per_line
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, dword ptr arg(3) ;src_pixels_per_line
|
||||
%else
|
||||
lea rdi, [rdi + r9]
|
||||
%endif
|
||||
|
||||
sub rcx, 1
|
||||
jnz filter_block2d_bil_var_ssse3_loop
|
||||
|
||||
jmp filter_block2d_bil_variance
|
||||
|
||||
filter_block2d_bil_var_ssse3_sp_only:
|
||||
movsxd rdx, dword ptr arg(6) ; yoffset
|
||||
|
||||
cmp rdx, 0 ; Both xoffset =0 and yoffset=0
|
||||
je filter_block2d_bil_var_ssse3_full_pixel
|
||||
|
||||
shl rdx, 4
|
||||
lea rdx, [rdx + rcx] ; VFilter
|
||||
|
||||
mov rsi, arg(0) ;ref_ptr
|
||||
mov rdi, arg(2) ;src_ptr
|
||||
movsxd rcx, dword ptr arg(4) ;Height
|
||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
|
||||
movdqu xmm1, XMMWORD PTR [rsi]
|
||||
movdqa xmm0, xmm1
|
||||
|
||||
movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
|
||||
lea rsi, [rsi + rax]
|
||||
|
||||
filter_block2d_bil_sp_only_loop:
|
||||
movdqu xmm3, XMMWORD PTR [rsi]
|
||||
movdqa xmm2, xmm1
|
||||
movdqa xmm0, xmm3
|
||||
|
||||
punpcklbw xmm1, xmm3
|
||||
punpckhbw xmm2, xmm3
|
||||
pmaddubsw xmm1, [rdx]
|
||||
pmaddubsw xmm2, [rdx]
|
||||
|
||||
paddw xmm1, [GLOBAL(xmm_bi_rd)]
|
||||
paddw xmm2, [GLOBAL(xmm_bi_rd)]
|
||||
psraw xmm1, xmm_filter_shift
|
||||
psraw xmm2, xmm_filter_shift
|
||||
|
||||
movq xmm3, QWORD PTR [rdi]
|
||||
pxor xmm4, xmm4
|
||||
punpcklbw xmm3, xmm4
|
||||
movq xmm5, QWORD PTR [rdi+8]
|
||||
punpcklbw xmm5, xmm4
|
||||
|
||||
psubw xmm1, xmm3
|
||||
psubw xmm2, xmm5
|
||||
paddw xmm6, xmm1
|
||||
paddw xmm6, xmm2
|
||||
pmaddwd xmm1, xmm1
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm7, xmm1
|
||||
paddd xmm7, xmm2
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
lea rsi, [rsi + rax] ;ref_pixels_per_line
|
||||
lea rdi, [rdi + rbx] ;src_pixels_per_line
|
||||
|
||||
sub rcx, 1
|
||||
jnz filter_block2d_bil_sp_only_loop
|
||||
|
||||
jmp filter_block2d_bil_variance
|
||||
|
||||
filter_block2d_bil_var_ssse3_full_pixel:
|
||||
mov rsi, arg(0) ;ref_ptr
|
||||
mov rdi, arg(2) ;src_ptr
|
||||
movsxd rcx, dword ptr arg(4) ;Height
|
||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
|
||||
pxor xmm0, xmm0
|
||||
|
||||
filter_block2d_bil_full_pixel_loop:
|
||||
movq xmm1, QWORD PTR [rsi]
|
||||
punpcklbw xmm1, xmm0
|
||||
movq xmm2, QWORD PTR [rsi+8]
|
||||
punpcklbw xmm2, xmm0
|
||||
|
||||
movq xmm3, QWORD PTR [rdi]
|
||||
punpcklbw xmm3, xmm0
|
||||
movq xmm4, QWORD PTR [rdi+8]
|
||||
punpcklbw xmm4, xmm0
|
||||
|
||||
psubw xmm1, xmm3
|
||||
psubw xmm2, xmm4
|
||||
paddw xmm6, xmm1
|
||||
paddw xmm6, xmm2
|
||||
pmaddwd xmm1, xmm1
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm7, xmm1
|
||||
paddd xmm7, xmm2
|
||||
|
||||
lea rsi, [rsi + rax] ;ref_pixels_per_line
|
||||
lea rdi, [rdi + rbx] ;src_pixels_per_line
|
||||
sub rcx, 1
|
||||
jnz filter_block2d_bil_full_pixel_loop
|
||||
|
||||
jmp filter_block2d_bil_variance
|
||||
|
||||
filter_block2d_bil_var_ssse3_fp_only:
|
||||
mov rsi, arg(0) ;ref_ptr
|
||||
mov rdi, arg(2) ;src_ptr
|
||||
movsxd rcx, dword ptr arg(4) ;Height
|
||||
movsxd rdx, dword ptr arg(1) ;ref_pixels_per_line
|
||||
|
||||
pxor xmm0, xmm0
|
||||
movsxd rbx, dword ptr arg(3) ;src_pixels_per_line
|
||||
|
||||
filter_block2d_bil_fp_only_loop:
|
||||
movdqu xmm1, XMMWORD PTR [rsi]
|
||||
movdqu xmm2, XMMWORD PTR [rsi+1]
|
||||
movdqa xmm3, xmm1
|
||||
|
||||
punpcklbw xmm1, xmm2
|
||||
punpckhbw xmm3, xmm2
|
||||
pmaddubsw xmm1, [rax]
|
||||
pmaddubsw xmm3, [rax]
|
||||
|
||||
paddw xmm1, [GLOBAL(xmm_bi_rd)]
|
||||
paddw xmm3, [GLOBAL(xmm_bi_rd)]
|
||||
psraw xmm1, xmm_filter_shift
|
||||
psraw xmm3, xmm_filter_shift
|
||||
|
||||
movq xmm2, XMMWORD PTR [rdi]
|
||||
pxor xmm4, xmm4
|
||||
punpcklbw xmm2, xmm4
|
||||
movq xmm5, QWORD PTR [rdi+8]
|
||||
punpcklbw xmm5, xmm4
|
||||
|
||||
psubw xmm1, xmm2
|
||||
psubw xmm3, xmm5
|
||||
paddw xmm6, xmm1
|
||||
paddw xmm6, xmm3
|
||||
pmaddwd xmm1, xmm1
|
||||
pmaddwd xmm3, xmm3
|
||||
paddd xmm7, xmm1
|
||||
paddd xmm7, xmm3
|
||||
|
||||
lea rsi, [rsi + rdx]
|
||||
lea rdi, [rdi + rbx] ;src_pixels_per_line
|
||||
|
||||
sub rcx, 1
|
||||
jnz filter_block2d_bil_fp_only_loop
|
||||
|
||||
jmp filter_block2d_bil_variance
|
||||
|
||||
filter_block2d_bil_variance:
|
||||
pxor xmm0, xmm0
|
||||
pxor xmm1, xmm1
|
||||
pxor xmm5, xmm5
|
||||
|
||||
punpcklwd xmm0, xmm6
|
||||
punpckhwd xmm1, xmm6
|
||||
psrad xmm0, 16
|
||||
psrad xmm1, 16
|
||||
paddd xmm0, xmm1
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
movdqa xmm6, xmm7
|
||||
punpckldq xmm6, xmm5
|
||||
punpckhdq xmm7, xmm5
|
||||
paddd xmm6, xmm7
|
||||
|
||||
punpckldq xmm0, xmm5
|
||||
punpckhdq xmm1, xmm5
|
||||
paddd xmm0, xmm1
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
psrldq xmm7, 8
|
||||
psrldq xmm1, 8
|
||||
|
||||
paddd xmm6, xmm7
|
||||
paddd xmm0, xmm1
|
||||
|
||||
mov rsi, arg(7) ;[Sum]
|
||||
mov rdi, arg(8) ;[SSE]
|
||||
|
||||
movd [rsi], xmm0
|
||||
movd [rdi], xmm6
|
||||
|
||||
; begin epilog
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
xmm_bi_rd:
|
||||
times 8 dw 64
|
||||
align 16
|
||||
vp8_bilinear_filters_ssse3:
|
||||
times 8 db 128, 0
|
||||
times 8 db 112, 16
|
||||
times 8 db 96, 32
|
||||
times 8 db 80, 48
|
||||
times 8 db 64, 64
|
||||
times 8 db 48, 80
|
||||
times 8 db 32, 96
|
||||
times 8 db 16, 112
|
@@ -53,13 +53,6 @@ extern unsigned int vp8_get4x4var_mmx
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
extern unsigned int vp8_get4x4sse_cs_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride
|
||||
);
|
||||
extern void vp8_filter_block2d_bil4x4_var_mmx
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
@@ -92,39 +85,6 @@ extern unsigned int vp8_get16x16pred_error_mmx
|
||||
);
|
||||
|
||||
|
||||
void vp8_test_get_mb_ss(void)
|
||||
{
|
||||
short zz[] =
|
||||
{
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-2, -2, -2, -2, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, 2, 2,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-3, -3, -3, -3, 3, 3, 3, 3, -3, -3, -3, -3, 3, 3, 3, 3,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
-4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
|
||||
};
|
||||
int s = 0, x = vp8_get_mb_ss_mmx(zz);
|
||||
{
|
||||
int y;
|
||||
|
||||
for (y = 0; y < 256; y++)
|
||||
s += (zz[y] * zz[y]);
|
||||
}
|
||||
|
||||
x += 0;
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_get16x16var_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
@@ -456,146 +416,6 @@ unsigned int vp8_sub_pixel_variance8x16_mmx
|
||||
return (xxsum - ((xsum * xsum) >> 7));
|
||||
}
|
||||
|
||||
unsigned int vp8_i_variance16x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, sse2, sse3, var;
|
||||
int sum0, sum1, sum2, sum3, avg;
|
||||
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse2, &sum2) ;
|
||||
vp8_get8x8var_mmx(src_ptr + (source_stride >> 1) + 8, source_stride, ref_ptr + (recon_stride >> 1) + 8, recon_stride, &sse3, &sum3);
|
||||
|
||||
var = sse0 + sse1 + sse2 + sse3;
|
||||
avg = sum0 + sum1 + sum2 + sum3;
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 8));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_i_variance8x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse1, &sum1) ;
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_i_sub_pixel_variance16x16_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
int f2soffset = (src_pixels_per_line >> 1);
|
||||
int f2doffset = (dst_pixels_per_line >> 1);
|
||||
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum0, &xxsum0
|
||||
);
|
||||
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr + f2soffset, src_pixels_per_line,
|
||||
dst_ptr + f2doffset, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr + f2soffset + 8, src_pixels_per_line,
|
||||
dst_ptr + f2doffset + 8, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_i_sub_pixel_variance8x16_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
int f2soffset = (src_pixels_per_line >> 1);
|
||||
int f2doffset = (dst_pixels_per_line >> 1);
|
||||
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum0, &xxsum0
|
||||
);
|
||||
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr + f2soffset, src_pixels_per_line,
|
||||
dst_ptr + f2doffset, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_h_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
|
@@ -81,6 +81,16 @@ void vp8_filter_block2d_bil_var_sse2
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_horiz_vert_variance8x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_pixels_per_line,
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
unsigned int Height,
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_horiz_vert_variance16x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
@@ -91,6 +101,16 @@ void vp8_half_horiz_vert_variance16x_h_sse2
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_horiz_variance8x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_pixels_per_line,
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
unsigned int Height,
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_horiz_variance16x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
@@ -101,6 +121,16 @@ void vp8_half_horiz_variance16x_h_sse2
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_vert_variance8x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_pixels_per_line,
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
unsigned int Height,
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_vert_variance16x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
@@ -262,21 +292,21 @@ unsigned int vp8_sub_pixel_variance8x8_wmt
|
||||
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
{
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
vp8_half_horiz_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
vp8_half_vert_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
vp8_half_horiz_vert_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum, &xxsum);
|
||||
@@ -317,11 +347,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
@@ -329,11 +354,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
@@ -341,11 +361,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -356,17 +371,16 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||
&xsum0, &xxsum0
|
||||
);
|
||||
|
||||
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
xoffset, yoffset,
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
}
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
@@ -406,11 +420,6 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
@@ -418,11 +427,6 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
@@ -430,11 +434,6 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -449,11 +448,10 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
xoffset, yoffset,
|
||||
&xsum1, &xxsum1);
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
}
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
||||
}
|
||||
@@ -474,21 +472,21 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
|
||||
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
{
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
vp8_half_horiz_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
vp8_half_vert_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
vp8_half_horiz_vert_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum, &xxsum);
|
||||
@@ -506,81 +504,6 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
|
||||
return (xxsum - ((xsum * xsum) >> 7));
|
||||
}
|
||||
|
||||
unsigned int vp8_i_variance16x16_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, sse2, sse3, var;
|
||||
int sum0, sum1, sum2, sum3, avg;
|
||||
|
||||
|
||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
vp8_get8x8var_sse2(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse2, &sum2) ;
|
||||
vp8_get8x8var_sse2(src_ptr + (source_stride >> 1) + 8, source_stride, ref_ptr + (recon_stride >> 1) + 8, recon_stride, &sse3, &sum3);
|
||||
|
||||
var = sse0 + sse1 + sse2 + sse3;
|
||||
avg = sum0 + sum1 + sum2 + sum3;
|
||||
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 8));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_i_variance8x16_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_sse2(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse1, &sum1) ;
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_i_sub_pixel_variance16x16_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
return vp8_sub_pixel_variance16x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_i_sub_pixel_variance8x16_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
|
||||
return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_h_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
@@ -589,21 +512,14 @@ unsigned int vp8_variance_halfpixvar16x16_h_wmt(
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
int xsum0;
|
||||
unsigned int xxsum0;
|
||||
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
@@ -616,21 +532,13 @@ unsigned int vp8_variance_halfpixvar16x16_v_wmt(
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
|
||||
int xsum0;
|
||||
unsigned int xxsum0;
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
@@ -643,21 +551,14 @@ unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
int xsum0;
|
||||
unsigned int xxsum0;
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
165
vp8/encoder/x86/variance_ssse3.c
Normal file
165
vp8/encoder/x86/variance_ssse3.c
Normal file
@@ -0,0 +1,165 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vp8/encoder/variance.h"
|
||||
#include "vp8/common/pragmas.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
extern unsigned int vp8_get16x16var_sse2
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *SSE,
|
||||
int *Sum
|
||||
);
|
||||
extern void vp8_half_horiz_vert_variance16x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_pixels_per_line,
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
unsigned int Height,
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
extern void vp8_half_horiz_variance16x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_pixels_per_line,
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
unsigned int Height,
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
extern void vp8_half_vert_variance16x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_pixels_per_line,
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
unsigned int Height,
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
extern void vp8_filter_block2d_bil_var_ssse3
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_pixels_per_line,
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
unsigned int Height,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x16_ssse3
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
int xsum0;
|
||||
unsigned int xxsum0;
|
||||
|
||||
// note we could avoid these if statements if the calling function
|
||||
// just called the appropriate functions inside.
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
{
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_bil_var_ssse3(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
xoffset, yoffset,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x8_ssse3
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
|
||||
)
|
||||
{
|
||||
int xsum0;
|
||||
unsigned int xxsum0;
|
||||
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
{
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_bil_var_ssse3(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
xoffset, yoffset,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
||||
}
|
@@ -286,6 +286,8 @@ extern prototype_sad_multi_dif_address(vp8_sad4x4x4d_sse3);
|
||||
#if HAVE_SSSE3
|
||||
extern prototype_sad_multi_same_address(vp8_sad16x16x3_ssse3);
|
||||
extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_ssse3);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_variance_sad16x16x3
|
||||
@@ -294,6 +296,12 @@ extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
|
||||
#undef vp8_variance_sad16x8x3
|
||||
#define vp8_variance_sad16x8x3 vp8_sad16x8x3_ssse3
|
||||
|
||||
#undef vp8_variance_subpixvar16x8
|
||||
#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_ssse3
|
||||
|
||||
#undef vp8_variance_subpixvar16x16
|
||||
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_ssse3
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@@ -16,7 +16,7 @@
|
||||
|
||||
|
||||
#if HAVE_MMX
|
||||
void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
|
||||
static void short_fdct8x4_mmx(short *input, short *output, int pitch)
|
||||
{
|
||||
vp8_short_fdct4x4_mmx(input, output, pitch);
|
||||
vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
|
||||
@@ -26,7 +26,7 @@ int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
|
||||
short *qcoeff_ptr, short *dequant_ptr,
|
||||
short *scan_mask, short *round_ptr,
|
||||
short *quant_ptr, short *dqcoeff_ptr);
|
||||
void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
|
||||
static void fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
|
||||
short *coeff_ptr = b->coeff;
|
||||
@@ -51,7 +51,7 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
|
||||
}
|
||||
|
||||
int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
|
||||
int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc)
|
||||
static int mbblock_error_mmx(MACROBLOCK *mb, int dc)
|
||||
{
|
||||
short *coeff_ptr = mb->block[0].coeff;
|
||||
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
|
||||
@@ -59,7 +59,7 @@ int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc)
|
||||
}
|
||||
|
||||
int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
|
||||
int vp8_mbuverror_mmx(MACROBLOCK *mb)
|
||||
static int mbuverror_mmx(MACROBLOCK *mb)
|
||||
{
|
||||
short *s_ptr = &mb->coeff[256];
|
||||
short *d_ptr = &mb->e_mbd.dqcoeff[256];
|
||||
@@ -69,7 +69,7 @@ int vp8_mbuverror_mmx(MACROBLOCK *mb)
|
||||
void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
|
||||
short *diff, unsigned char *predictor,
|
||||
int pitch);
|
||||
void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
static void subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
{
|
||||
unsigned char *z = *(be->base_src) + be->src;
|
||||
unsigned int src_stride = be->src_stride;
|
||||
@@ -85,7 +85,7 @@ int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
|
||||
short *qcoeff_ptr, short *dequant_ptr,
|
||||
const short *inv_scan_order, short *round_ptr,
|
||||
short *quant_ptr, short *dqcoeff_ptr);
|
||||
void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
|
||||
static void fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
|
||||
short *coeff_ptr = b->coeff;
|
||||
@@ -115,7 +115,7 @@ int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
|
||||
short *zbin_boost_ptr,
|
||||
short *quant_shift_ptr);
|
||||
|
||||
void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
|
||||
static void regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
|
||||
{
|
||||
d->eob = vp8_regular_quantize_b_impl_sse2(b->coeff,
|
||||
b->zbin,
|
||||
@@ -131,7 +131,7 @@ void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
|
||||
}
|
||||
|
||||
int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
|
||||
int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc)
|
||||
static int mbblock_error_xmm(MACROBLOCK *mb, int dc)
|
||||
{
|
||||
short *coeff_ptr = mb->block[0].coeff;
|
||||
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
|
||||
@@ -139,7 +139,7 @@ int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc)
|
||||
}
|
||||
|
||||
int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
|
||||
int vp8_mbuverror_xmm(MACROBLOCK *mb)
|
||||
static int mbuverror_xmm(MACROBLOCK *mb)
|
||||
{
|
||||
short *s_ptr = &mb->coeff[256];
|
||||
short *d_ptr = &mb->e_mbd.dqcoeff[256];
|
||||
@@ -149,7 +149,7 @@ int vp8_mbuverror_xmm(MACROBLOCK *mb)
|
||||
void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
|
||||
short *diff, unsigned char *predictor,
|
||||
int pitch);
|
||||
void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
static void subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
{
|
||||
unsigned char *z = *(be->base_src) + be->src;
|
||||
unsigned int src_stride = be->src_stride;
|
||||
@@ -165,7 +165,7 @@ int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr,
|
||||
short *qcoeff_ptr, short *dequant_ptr,
|
||||
short *round_ptr,
|
||||
short *quant_ptr, short *dqcoeff_ptr);
|
||||
void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
|
||||
static void fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
d->eob = vp8_fast_quantize_b_impl_ssse3(
|
||||
b->coeff,
|
||||
@@ -176,6 +176,25 @@ void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
|
||||
d->dqcoeff
|
||||
);
|
||||
}
|
||||
#if CONFIG_PSNR
|
||||
#if ARCH_X86_64
|
||||
typedef void ssimpf
|
||||
(
|
||||
unsigned char *s,
|
||||
int sp,
|
||||
unsigned char *r,
|
||||
int rp,
|
||||
unsigned long *sum_s,
|
||||
unsigned long *sum_r,
|
||||
unsigned long *sum_sq_s,
|
||||
unsigned long *sum_sq_r,
|
||||
unsigned long *sum_sxr
|
||||
);
|
||||
|
||||
extern ssimpf vp8_ssim_parms_16x16_sse3;
|
||||
extern ssimpf vp8_ssim_parms_8x8_sse3;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -232,20 +251,20 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx;
|
||||
|
||||
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx;
|
||||
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx;
|
||||
cpi->rtcd.fdct.short8x4 = short_fdct8x4_mmx;
|
||||
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx;
|
||||
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx;
|
||||
cpi->rtcd.fdct.fast8x4 = short_fdct8x4_mmx;
|
||||
|
||||
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
|
||||
|
||||
cpi->rtcd.encodemb.berr = vp8_block_error_mmx;
|
||||
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_mmx;
|
||||
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_mmx;
|
||||
cpi->rtcd.encodemb.subb = vp8_subtract_b_mmx;
|
||||
cpi->rtcd.encodemb.mberr = mbblock_error_mmx;
|
||||
cpi->rtcd.encodemb.mbuverr = mbuverror_mmx;
|
||||
cpi->rtcd.encodemb.subb = subtract_b_mmx;
|
||||
cpi->rtcd.encodemb.submby = vp8_subtract_mby_mmx;
|
||||
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_mmx;
|
||||
|
||||
/*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/
|
||||
/*cpi->rtcd.quantize.fastquantb = fast_quantize_b_mmx;*/
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -280,6 +299,8 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_sse2;
|
||||
cpi->rtcd.variance.get8x8var = vp8_get8x8var_sse2;
|
||||
cpi->rtcd.variance.get16x16var = vp8_get16x16var_sse2;
|
||||
|
||||
|
||||
/* cpi->rtcd.variance.get4x4sse_cs not implemented for wmt */;
|
||||
|
||||
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_sse2;
|
||||
@@ -290,16 +311,16 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2 ;
|
||||
|
||||
cpi->rtcd.encodemb.berr = vp8_block_error_xmm;
|
||||
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm;
|
||||
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm;
|
||||
cpi->rtcd.encodemb.subb = vp8_subtract_b_sse2;
|
||||
cpi->rtcd.encodemb.mberr = mbblock_error_xmm;
|
||||
cpi->rtcd.encodemb.mbuverr = mbuverror_xmm;
|
||||
cpi->rtcd.encodemb.subb = subtract_b_sse2;
|
||||
cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2;
|
||||
cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2;
|
||||
|
||||
#if ARCH_X86
|
||||
cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;
|
||||
cpi->rtcd.quantize.quantb = regular_quantize_b_sse2;
|
||||
#endif
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2;
|
||||
cpi->rtcd.quantize.fastquantb = fast_quantize_b_sse2;
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
|
||||
@@ -334,11 +355,23 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3;
|
||||
cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3;
|
||||
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
|
||||
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ssse3;
|
||||
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3;
|
||||
|
||||
cpi->rtcd.quantize.fastquantb = fast_quantize_b_ssse3;
|
||||
|
||||
#if CONFIG_PSNR
|
||||
#if ARCH_X86_64
|
||||
cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse3;
|
||||
cpi->rtcd.variance.ssimpf = vp8_ssim_parms_16x16_sse3;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if HAVE_SSE4_1
|
||||
if (SSE4_1Enabled)
|
||||
{
|
||||
|
@@ -24,6 +24,7 @@ VP8_COMMON_SRCS-yes += common/entropymode.c
|
||||
VP8_COMMON_SRCS-yes += common/entropymv.c
|
||||
VP8_COMMON_SRCS-yes += common/extend.c
|
||||
VP8_COMMON_SRCS-yes += common/filter.c
|
||||
VP8_COMMON_SRCS-yes += common/filter.h
|
||||
VP8_COMMON_SRCS-yes += common/findnearmv.c
|
||||
VP8_COMMON_SRCS-yes += common/generic/systemdependent.c
|
||||
VP8_COMMON_SRCS-yes += common/idctllm.c
|
||||
@@ -68,7 +69,7 @@ VP8_COMMON_SRCS-yes += common/reconintra.c
|
||||
VP8_COMMON_SRCS-yes += common/reconintra4x4.c
|
||||
VP8_COMMON_SRCS-yes += common/setupintrarecon.c
|
||||
VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
|
||||
VP8_COMMON_SRCS-yes += common/textblit.c
|
||||
VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c
|
||||
VP8_COMMON_SRCS-yes += common/treecoder.c
|
||||
|
||||
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/idct_x86.h
|
||||
|
@@ -199,7 +199,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
{
|
||||
int mb_r = (cfg->g_h + 15) / 16;
|
||||
int mb_c = (cfg->g_w + 15) / 16;
|
||||
size_t packet_sz = vp8_firstpass_stats_sz(mb_r * mb_c);
|
||||
size_t packet_sz = sizeof(FIRSTPASS_STATS);
|
||||
int n_packets = cfg->rc_twopass_stats_in.sz / packet_sz;
|
||||
FIRSTPASS_STATS *stats;
|
||||
|
||||
|
@@ -168,7 +168,7 @@ static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, unsigned int id)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NELEMENTS(vp8_mem_req_segs); i++)
|
||||
for (i = 0; i < NELEMENTS(ctx->mmaps); i++)
|
||||
if (ctx->mmaps[i].id == id)
|
||||
return ctx->mmaps[i].base;
|
||||
|
||||
@@ -176,25 +176,7 @@ static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, unsigned int id)
|
||||
}
|
||||
static void vp8_finalize_mmaps(vpx_codec_alg_priv_t *ctx)
|
||||
{
|
||||
/*
|
||||
ctx->pbi = mmap_lkup(ctx, VP6_SEG_PB_INSTANCE);
|
||||
ctx->pbi->mbi.block_dx_info[0].idct_output_ptr = mmap_lkup(ctx, VP6_SEG_IDCT_BUFFER);
|
||||
ctx->pbi->loop_filtered_block = mmap_lkup(ctx, VP6_SEG_LF_BLOCK);
|
||||
ctx->pbi->huff = mmap_lkup(ctx, VP6_SEG_HUFF);
|
||||
ctx->pbi->mbi.coeffs_base_ptr = mmap_lkup(ctx, VP6_SEG_COEFFS);
|
||||
ctx->pbi->fc.above_y = mmap_lkup(ctx, VP6_SEG_ABOVEY);
|
||||
ctx->pbi->fc.above_u = mmap_lkup(ctx, VP6_SEG_ABOVEU);
|
||||
ctx->pbi->fc.above_v = mmap_lkup(ctx, VP6_SEG_ABOVEV);
|
||||
ctx->pbi->prediction_mode = mmap_lkup(ctx, VP6_SEG_PRED_MODES);
|
||||
ctx->pbi->mbmotion_vector = mmap_lkup(ctx, VP6_SEG_MV_FIELD);
|
||||
ctx->pbi->fb_storage_ptr[0] = mmap_lkup(ctx, VP6_SEG_IMG0_STRG);
|
||||
ctx->pbi->fb_storage_ptr[1] = mmap_lkup(ctx, VP6_SEG_IMG1_STRG);
|
||||
ctx->pbi->fb_storage_ptr[2] = mmap_lkup(ctx, VP6_SEG_IMG2_STRG);
|
||||
#if CONFIG_POSTPROC
|
||||
ctx->pbi->postproc.deblock.fragment_variances = mmap_lkup(ctx, VP6_SEG_DEBLOCKER);
|
||||
ctx->pbi->fb_storage_ptr[3] = mmap_lkup(ctx, VP6_SEG_PP_IMG_STRG);
|
||||
#endif
|
||||
*/
|
||||
/* nothing to clean up */
|
||||
}
|
||||
|
||||
static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx)
|
||||
@@ -543,7 +525,7 @@ static vpx_codec_err_t vp8_xma_set_mmap(vpx_codec_ctx_t *ctx,
|
||||
|
||||
if (!res && ctx->priv->alg_priv)
|
||||
{
|
||||
for (i = 0; i < NELEMENTS(vp8_mem_req_segs); i++)
|
||||
for (i = 0; i < NELEMENTS(ctx->priv->alg_priv->mmaps); i++)
|
||||
{
|
||||
if (ctx->priv->alg_priv->mmaps[i].id == mmap->id)
|
||||
if (!ctx->priv->alg_priv->mmaps[i].base)
|
||||
|
@@ -110,10 +110,13 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/variance_ssse3.c
|
||||
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/variance_impl_ssse3.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
|
||||
VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt.asm
|
||||
|
||||
ifeq ($(CONFIG_REALTIME_ONLY),yes)
|
||||
VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
|
||||
|
@@ -19,6 +19,7 @@ VP8_CX_SRCS-$(ARCH_ARM) += encoder/asm_enc_offsets.c
|
||||
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/encodemb_arm.c
|
||||
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/quantize_arm.c
|
||||
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/picklpf_arm.c
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/dct_arm.c
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/variance_arm.c
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/variance_arm.h
|
||||
VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
|
||||
@@ -34,8 +35,12 @@ VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_ar
|
||||
|
||||
#File list for armv6
|
||||
# encoder
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_fast_fdct4x4_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance8x8_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/walsh_v6$(ASM)
|
||||
|
||||
#File list for neon
|
||||
|
@@ -332,7 +332,7 @@ typedef struct vpx_codec_priv_cb_pair
|
||||
* extended in one of two ways. First, a second, algorithm specific structure
|
||||
* can be allocated and the priv member pointed to it. Alternatively, this
|
||||
* structure can be made the first member of the algorithm specific structure,
|
||||
* and the pointer casted to the proper type.
|
||||
* and the pointer cast to the proper type.
|
||||
*/
|
||||
struct vpx_codec_priv
|
||||
{
|
||||
|
@@ -36,6 +36,8 @@ vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx,
|
||||
res = VPX_CODEC_INCAPABLE;
|
||||
else if ((flags & VPX_CODEC_USE_POSTPROC) && !(iface->caps & VPX_CODEC_CAP_POSTPROC))
|
||||
res = VPX_CODEC_INCAPABLE;
|
||||
else if (!(iface->caps & VPX_CODEC_CAP_DECODER))
|
||||
res = VPX_CODEC_INCAPABLE;
|
||||
else
|
||||
{
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
|
@@ -498,7 +498,7 @@ extern "C" {
|
||||
* Iterates over a list of the segments to allocate. The iterator storage
|
||||
* should be initialized to NULL to start the iteration. Iteration is complete
|
||||
* when this function returns VPX_CODEC_LIST_END. The amount of memory needed to
|
||||
* allocate is dependant upon the size of the encoded stream. In cases where the
|
||||
* allocate is dependent upon the size of the encoded stream. In cases where the
|
||||
* stream is not available at allocation time, a fixed size must be requested.
|
||||
* The codec will not be able to operate on streams larger than the size used at
|
||||
* allocation time.
|
||||
|
@@ -527,7 +527,7 @@ extern "C" {
|
||||
* Iterates over a list of the segments to allocate. The iterator storage
|
||||
* should be initialized to NULL to start the iteration. Iteration is complete
|
||||
* when this function returns VPX_DEC_LIST_END. The amount of memory needed to
|
||||
* allocate is dependant upon the size of the encoded stream. This means that
|
||||
* allocate is dependent upon the size of the encoded stream. This means that
|
||||
* the stream info structure must be known at allocation time. It can be
|
||||
* populated with the vpx_dec_peek_stream_info() function. In cases where the
|
||||
* stream to be decoded is not available at allocation time, a fixed size must
|
||||
|
@@ -168,15 +168,10 @@
|
||||
%macro GET_GOT 1
|
||||
push %1
|
||||
call %%get_got
|
||||
%%sub_offset:
|
||||
jmp %%exitGG
|
||||
%%get_got:
|
||||
mov %1, [esp]
|
||||
add %1, fake_got - %%sub_offset
|
||||
ret
|
||||
%%exitGG:
|
||||
pop %1
|
||||
%undef GLOBAL
|
||||
%define GLOBAL(x) x + %1 - fake_got
|
||||
%define GLOBAL(x) x + %1 - %%get_got
|
||||
%undef RESTORE_GOT
|
||||
%define RESTORE_GOT pop %1
|
||||
%endmacro
|
||||
@@ -289,7 +284,6 @@
|
||||
%elifidn __OUTPUT_FORMAT__,macho32
|
||||
%macro SECTION_RODATA 0
|
||||
section .text
|
||||
fake_got:
|
||||
%endmacro
|
||||
%else
|
||||
%define SECTION_RODATA section .rodata
|
||||
|
@@ -88,24 +88,3 @@ vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
****************************************************************************/
|
||||
int
|
||||
vp8_yv12_black_frame_buffer(YV12_BUFFER_CONFIG *ybf)
|
||||
{
|
||||
if (ybf)
|
||||
{
|
||||
if (ybf->buffer_alloc)
|
||||
{
|
||||
duck_memset(ybf->y_buffer, 0x0, ybf->y_stride * ybf->y_height);
|
||||
duck_memset(ybf->u_buffer, 0x80, ybf->uv_stride * ybf->uv_height);
|
||||
duck_memset(ybf->v_buffer, 0x80, ybf->uv_stride * ybf->uv_height);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
@@ -145,8 +145,8 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
vp8_yv12_extend_frame_borders_yonly(YV12_BUFFER_CONFIG *ybf)
|
||||
static void
|
||||
extend_frame_borders_yonly(YV12_BUFFER_CONFIG *ybf)
|
||||
{
|
||||
int i;
|
||||
unsigned char *src_ptr1, *src_ptr2;
|
||||
@@ -276,5 +276,5 @@ vp8_yv12_copy_frame_yonly(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_y
|
||||
dest += dst_ybc->y_stride;
|
||||
}
|
||||
|
||||
vp8_yv12_extend_frame_borders_yonly(dst_ybc);
|
||||
extend_frame_borders_yonly(dst_ybc);
|
||||
}
|
||||
|
@@ -6,13 +6,13 @@ SCALE_SRCS-yes += vpxscale.h
|
||||
SCALE_SRCS-yes += generic/vpxscale.c
|
||||
SCALE_SRCS-yes += generic/yv12config.c
|
||||
SCALE_SRCS-yes += generic/yv12extend.c
|
||||
SCALE_SRCS-yes += generic/scalesystemdependant.c
|
||||
SCALE_SRCS-yes += generic/scalesystemdependent.c
|
||||
SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c
|
||||
|
||||
#arm
|
||||
SCALE_SRCS-$(HAVE_ARMV7) += arm/scalesystemdependant.c
|
||||
SCALE_SRCS-$(HAVE_ARMV7) += arm/scalesystemdependent.c
|
||||
SCALE_SRCS-$(HAVE_ARMV7) += arm/yv12extend_arm.c
|
||||
SCALE_SRCS_REMOVE-$(HAVE_ARMV7) += generic/scalesystemdependant.c
|
||||
SCALE_SRCS_REMOVE-$(HAVE_ARMV7) += generic/scalesystemdependent.c
|
||||
|
||||
#neon
|
||||
SCALE_SRCS-$(HAVE_ARMV7) += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM)
|
||||
|
@@ -11,9 +11,9 @@
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Module Title : system_dependant.c
|
||||
* Module Title : system_dependent.c
|
||||
*
|
||||
* Description : Miscellaneous system dependant functions
|
||||
* Description : Miscellaneous system dependent functions
|
||||
*
|
||||
****************************************************************************/
|
||||
|
@@ -63,7 +63,6 @@ extern "C"
|
||||
|
||||
int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
|
||||
int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);
|
||||
int vp8_yv12_black_frame_buffer(YV12_BUFFER_CONFIG *ybf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
16
vpxenc.c
16
vpxenc.c
@@ -163,8 +163,8 @@ int stats_open_file(stats_io_t *stats, const char *fpf, int pass)
|
||||
|
||||
if (!stats->buf.buf)
|
||||
{
|
||||
fprintf(stderr, "Failed to allocate first-pass stats buffer (%d bytes)\n",
|
||||
stats->buf_alloc_sz);
|
||||
fprintf(stderr, "Failed to allocate first-pass stats buffer (%lu bytes)\n",
|
||||
(unsigned long)stats->buf_alloc_sz);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
@@ -924,8 +924,14 @@ static const arg_def_t resize_up_thresh = ARG_DEF(NULL, "resize-up", 1,
|
||||
"Upscale threshold (buf %)");
|
||||
static const arg_def_t resize_down_thresh = ARG_DEF(NULL, "resize-down", 1,
|
||||
"Downscale threshold (buf %)");
|
||||
static const arg_def_t end_usage = ARG_DEF(NULL, "end-usage", 1,
|
||||
"VBR=0 | CBR=1 | CQ=2");
|
||||
static const struct arg_enum_list end_usage_enum[] = {
|
||||
{"vbr", VPX_VBR},
|
||||
{"cbr", VPX_CBR},
|
||||
{"cq", VPX_CQ},
|
||||
{NULL, 0}
|
||||
};
|
||||
static const arg_def_t end_usage = ARG_DEF_ENUM(NULL, "end-usage", 1,
|
||||
"Rate control mode", end_usage_enum);
|
||||
static const arg_def_t target_bitrate = ARG_DEF(NULL, "target-bitrate", 1,
|
||||
"Bitrate (kbps)");
|
||||
static const arg_def_t min_quantizer = ARG_DEF(NULL, "min-q", 1,
|
||||
@@ -1256,7 +1262,7 @@ int main(int argc, const char **argv_)
|
||||
else if (arg_match(&arg, &resize_down_thresh, argi))
|
||||
cfg.rc_resize_down_thresh = arg_parse_uint(&arg);
|
||||
else if (arg_match(&arg, &end_usage, argi))
|
||||
cfg.rc_end_usage = arg_parse_uint(&arg);
|
||||
cfg.rc_end_usage = arg_parse_enum_or_int(&arg);
|
||||
else if (arg_match(&arg, &target_bitrate, argi))
|
||||
cfg.rc_target_bitrate = arg_parse_uint(&arg);
|
||||
else if (arg_match(&arg, &min_quantizer, argi))
|
||||
|
Reference in New Issue
Block a user