Compare commits
165 Commits
v0.9.5
...
sandbox/hl
Author | SHA1 | Date | |
---|---|---|---|
![]() |
5f1b295527 | ||
![]() |
1422ce5cff | ||
![]() |
2f0331c90c | ||
![]() |
67fb3a5155 | ||
![]() |
f97f2b1bb6 | ||
![]() |
edcf74c6ad | ||
![]() |
d6d5d43708 | ||
![]() |
57136a268a | ||
![]() |
cb791aaa2f | ||
![]() |
339c512762 | ||
![]() |
a1a4d23797 | ||
![]() |
3aafb47729 | ||
![]() |
8f711db4e8 | ||
![]() |
415371c9d9 | ||
![]() |
2c1b06e672 | ||
![]() |
72e22b0bb8 | ||
![]() |
c8338ebf7a | ||
![]() |
b082790c7d | ||
![]() |
eda7d538bf | ||
![]() |
55acda98f7 | ||
![]() |
96fd758ea9 | ||
![]() |
6ff2b0883a | ||
![]() |
e88d7ab245 | ||
![]() |
f50f2fd2a7 | ||
![]() |
1546e6a8c9 | ||
![]() |
48c28fc42c | ||
![]() |
3675b2291c | ||
![]() |
cf7c4732e5 | ||
![]() |
405499d835 | ||
![]() |
c28b10adeb | ||
![]() |
e0846c9c8c | ||
![]() |
ba976eaa9b | ||
![]() |
3af3593c8e | ||
![]() |
f7e2f1fedf | ||
![]() |
dd314351e6 | ||
![]() |
6dbdfe3422 | ||
![]() |
8b0cf5f79d | ||
![]() |
1942eeb886 | ||
![]() |
431dac08d1 | ||
![]() |
b095d9df3c | ||
![]() |
de4e8185e9 | ||
![]() |
a864678cdb | ||
![]() |
3fb4abf3d1 | ||
![]() |
516ea8460b | ||
![]() |
bf53ec492d | ||
![]() |
e463b95b4e | ||
![]() |
a5a8d92976 | ||
![]() |
95dbe9ccfd | ||
![]() |
0f5264b584 | ||
![]() |
74e8446e58 | ||
![]() |
8c4552fb36 | ||
![]() |
d3c7365b46 | ||
![]() |
e2de094c99 | ||
![]() |
bd9b383db2 | ||
![]() |
30830d5a7c | ||
![]() |
20b855c33e | ||
![]() |
4b6219cb33 | ||
![]() |
092b5bef37 | ||
![]() |
6cb708d501 | ||
![]() |
c49f49b113 | ||
![]() |
fc6ce744a6 | ||
![]() |
b0da9b399d | ||
![]() |
2a87491fb0 | ||
![]() |
64baa8df2e | ||
![]() |
81cdeb7117 | ||
![]() |
4fbd0227f5 | ||
![]() |
08706a3ea7 | ||
![]() |
3ac73173a4 | ||
![]() |
23aa13d92c | ||
![]() |
7fb0f86863 | ||
![]() |
64f3d91579 | ||
![]() |
825adc464f | ||
![]() |
41f4458a03 | ||
![]() |
3809d7bbd9 | ||
![]() |
398aa81849 | ||
![]() |
b1aa54ab26 | ||
![]() |
b7b1e6fb55 | ||
![]() |
136bd2455e | ||
![]() |
97a86c5b13 | ||
![]() |
0ced701487 | ||
![]() |
e0cf330cde | ||
![]() |
cb9698951c | ||
![]() |
c63fc881e1 | ||
![]() |
160f3c7e9e | ||
![]() |
d88da98614 | ||
![]() |
718c19711a | ||
![]() |
f661fa1f24 | ||
![]() |
062980cc48 | ||
![]() |
7c03a1c308 | ||
![]() |
9520f4b3cc | ||
![]() |
2fa5d5a26d | ||
![]() |
d283d9bb30 | ||
![]() |
8534071de0 | ||
![]() |
ccb0348473 | ||
![]() |
cec6a596b5 | ||
![]() |
c3bbb29164 | ||
![]() |
5e76dfcc70 | ||
![]() |
9c8ad79fdc | ||
![]() |
3430820bbe | ||
![]() |
fd9f9dc054 | ||
![]() |
19e32ac7c7 | ||
![]() |
78cbe51bc3 | ||
![]() |
19255b8fe0 | ||
![]() |
ad6150f769 | ||
![]() |
1753f0d208 | ||
![]() |
70b885a0e8 | ||
![]() |
ed5ab7fa49 | ||
![]() |
9a6740af80 | ||
![]() |
f7670acc68 | ||
![]() |
f874391e02 | ||
![]() |
7ee516d2b3 | ||
![]() |
8d94796cad | ||
![]() |
79e2b1f39b | ||
![]() |
99d02c0f9f | ||
![]() |
69ee697fef | ||
![]() |
4fedfa75f8 | ||
![]() |
faaa57b945 | ||
![]() |
d49da085c0 | ||
![]() |
e180255375 | ||
![]() |
f4709d2895 | ||
![]() |
373f5c3144 | ||
![]() |
73189f21b3 | ||
![]() |
8c2dfde3ed | ||
![]() |
ef2f27f10e | ||
![]() |
0a49747b01 | ||
![]() |
58083cb34d | ||
![]() |
213f7b0907 | ||
![]() |
692b10858d | ||
![]() |
9b1ece2cca | ||
![]() |
5f0e0617ba | ||
![]() |
647df00f30 | ||
![]() |
513f8e6814 | ||
![]() |
6adbe09058 | ||
![]() |
458f4fedd2 | ||
![]() |
4d1b0d2a2d | ||
![]() |
9fb80f7170 | ||
![]() |
f7e187d362 | ||
![]() |
5551ef0ef4 | ||
![]() |
bd05d9e480 | ||
![]() |
507eb4b577 | ||
![]() |
0e7b60617f | ||
![]() |
a5397dbaf1 | ||
![]() |
77e6b4504b | ||
![]() |
4b9dc57260 | ||
![]() |
0a29bd9793 | ||
![]() |
b8f43aec66 | ||
![]() |
c377bf0eec | ||
![]() |
90c505f218 | ||
![]() |
9f61a83bf9 | ||
![]() |
ff4a71f4c2 | ||
![]() |
20745f8442 | ||
![]() |
0684c647ef | ||
![]() |
dcee88ea37 | ||
![]() |
6614563b8f | ||
![]() |
f57fc7bcc6 | ||
![]() |
9d93dabee0 | ||
![]() |
7e3a1e7361 | ||
![]() |
c4d7e5e67e | ||
![]() |
2b4913eb0d | ||
![]() |
a097e18964 | ||
![]() |
f26fe7d93b | ||
![]() |
3d84da6b8d | ||
![]() |
71ecb5d7d9 | ||
![]() |
15acc84f10 | ||
![]() |
8d0f7a01e6 |
45
args.c
45
args.c
@@ -135,6 +135,17 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs)
|
||||
def->long_name, long_val);
|
||||
|
||||
fprintf(fp, " %-37s\t%s\n", option_text, def->desc);
|
||||
|
||||
if(def->enums)
|
||||
{
|
||||
const struct arg_enum_list *listptr;
|
||||
|
||||
fprintf(fp, " %-37s\t ", "");
|
||||
|
||||
for(listptr = def->enums; listptr->name; listptr++)
|
||||
fprintf(fp, "%s%s", listptr->name,
|
||||
listptr[1].name ? ", " : "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -218,3 +229,37 @@ struct vpx_rational arg_parse_rational(const struct arg *arg)
|
||||
|
||||
return rat;
|
||||
}
|
||||
|
||||
|
||||
int arg_parse_enum(const struct arg *arg)
|
||||
{
|
||||
const struct arg_enum_list *listptr;
|
||||
long int rawval;
|
||||
char *endptr;
|
||||
|
||||
/* First see if the value can be parsed as a raw value */
|
||||
rawval = strtol(arg->val, &endptr, 10);
|
||||
if (arg->val[0] != '\0' && endptr[0] == '\0')
|
||||
{
|
||||
/* Got a raw value, make sure it's valid */
|
||||
for(listptr = arg->def->enums; listptr->name; listptr++)
|
||||
if(listptr->val == rawval)
|
||||
return rawval;
|
||||
}
|
||||
|
||||
/* Next see if it can be parsed as a string */
|
||||
for(listptr = arg->def->enums; listptr->name; listptr++)
|
||||
if(!strcmp(arg->val, listptr->name))
|
||||
return listptr->val;
|
||||
|
||||
die("Option %s: Invalid value '%s'\n", arg->name, arg->val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int arg_parse_enum_or_int(const struct arg *arg)
|
||||
{
|
||||
if(arg->def->enums)
|
||||
return arg_parse_enum(arg);
|
||||
return arg_parse_int(arg);
|
||||
}
|
||||
|
12
args.h
12
args.h
@@ -22,14 +22,23 @@ struct arg
|
||||
const struct arg_def *def;
|
||||
};
|
||||
|
||||
struct arg_enum_list
|
||||
{
|
||||
const char *name;
|
||||
int val;
|
||||
};
|
||||
#define ARG_ENUM_LIST_END {0}
|
||||
|
||||
typedef struct arg_def
|
||||
{
|
||||
const char *short_name;
|
||||
const char *long_name;
|
||||
int has_val;
|
||||
const char *desc;
|
||||
const struct arg_enum_list *enums;
|
||||
} arg_def_t;
|
||||
#define ARG_DEF(s,l,v,d) {s,l,v,d}
|
||||
#define ARG_DEF(s,l,v,d) {s,l,v,d, NULL}
|
||||
#define ARG_DEF_ENUM(s,l,v,d,e) {s,l,v,d,e}
|
||||
#define ARG_DEF_LIST_END {0}
|
||||
|
||||
struct arg arg_init(char **argv);
|
||||
@@ -41,4 +50,5 @@ char **argv_dup(int argc, const char **argv);
|
||||
unsigned int arg_parse_uint(const struct arg *arg);
|
||||
int arg_parse_int(const struct arg *arg);
|
||||
struct vpx_rational arg_parse_rational(const struct arg *arg);
|
||||
int arg_parse_enum_or_int(const struct arg *arg);
|
||||
#endif
|
||||
|
@@ -547,6 +547,10 @@ process_common_toolchain() {
|
||||
tgt_isa=universal
|
||||
tgt_os=darwin9
|
||||
;;
|
||||
*darwin10*)
|
||||
tgt_isa=x86_64
|
||||
tgt_os=darwin10
|
||||
;;
|
||||
*mingw32*|*cygwin*)
|
||||
[ -z "$tgt_isa" ] && tgt_isa=x86
|
||||
tgt_os=win32
|
||||
@@ -606,6 +610,12 @@ process_common_toolchain() {
|
||||
add_ldflags "-isysroot /Developer/SDKs/MacOSX10.5.sdk"
|
||||
add_ldflags "-mmacosx-version-min=10.5"
|
||||
;;
|
||||
*-darwin10-*)
|
||||
add_cflags "-isysroot /Developer/SDKs/MacOSX10.6.sdk"
|
||||
add_cflags "-mmacosx-version-min=10.6"
|
||||
add_ldflags "-isysroot /Developer/SDKs/MacOSX10.6.sdk"
|
||||
add_ldflags "-mmacosx-version-min=10.6"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Handle Solaris variants. Solaris 10 needs -lposix4
|
||||
@@ -824,6 +834,7 @@ process_common_toolchain() {
|
||||
soft_enable sse2
|
||||
soft_enable sse3
|
||||
soft_enable ssse3
|
||||
soft_enable sse4_1
|
||||
|
||||
case ${tgt_os} in
|
||||
win*)
|
||||
@@ -879,7 +890,7 @@ process_common_toolchain() {
|
||||
case ${tgt_os} in
|
||||
win*)
|
||||
add_asflags -f win${bits}
|
||||
enabled debug && add_asflags -g dwarf2
|
||||
enabled debug && add_asflags -g cv8
|
||||
;;
|
||||
linux*|solaris*)
|
||||
add_asflags -f elf${bits}
|
||||
|
11
configure
vendored
11
configure
vendored
@@ -41,6 +41,7 @@ Advanced options:
|
||||
${toggle_shared} shared library support
|
||||
${toggle_small} favor smaller size over speed
|
||||
${toggle_arm_asm_detok} assembly version of the detokenizer (ARM platforms only)
|
||||
${toggle_postproc_visualizer} macro block / block level visualizers
|
||||
|
||||
Codecs:
|
||||
Codecs can be selectively enabled or disabled individually, or by family:
|
||||
@@ -114,6 +115,7 @@ all_platforms="${all_platforms} x86-win32-vs7"
|
||||
all_platforms="${all_platforms} x86-win32-vs8"
|
||||
all_platforms="${all_platforms} x86-win32-vs9"
|
||||
all_platforms="${all_platforms} x86_64-darwin9-gcc"
|
||||
all_platforms="${all_platforms} x86_64-darwin10-gcc"
|
||||
all_platforms="${all_platforms} x86_64-linux-gcc"
|
||||
all_platforms="${all_platforms} x86_64-linux-icc"
|
||||
all_platforms="${all_platforms} x86_64-solaris-gcc"
|
||||
@@ -199,6 +201,7 @@ ARCH_EXT_LIST="
|
||||
sse2
|
||||
sse3
|
||||
ssse3
|
||||
sse4_1
|
||||
|
||||
altivec
|
||||
"
|
||||
@@ -249,6 +252,7 @@ CONFIG_LIST="
|
||||
shared
|
||||
small
|
||||
arm_asm_detok
|
||||
postproc_visualizer
|
||||
"
|
||||
CMDLINE_SELECT="
|
||||
extra_warnings
|
||||
@@ -288,6 +292,7 @@ CMDLINE_SELECT="
|
||||
shared
|
||||
small
|
||||
arm_asm_detok
|
||||
postproc_visualizer
|
||||
"
|
||||
|
||||
process_cmdline() {
|
||||
@@ -324,8 +329,6 @@ post_process_cmdline() {
|
||||
for c in ${CODECS}; do
|
||||
enabled ${c} && enable ${c##*_}s
|
||||
done
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -535,6 +538,10 @@ process_toolchain() {
|
||||
|
||||
# Other toolchain specific defaults
|
||||
case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac
|
||||
|
||||
if enabled postproc_visualizer; then
|
||||
enabled postproc || die "postproc_visualizer requires postproc to be enabled"
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
|
@@ -17,6 +17,7 @@ vpxdec.SRCS += md5_utils.c md5_utils.h
|
||||
vpxdec.SRCS += vpx_ports/vpx_timer.h
|
||||
vpxdec.SRCS += vpx/vpx_integer.h
|
||||
vpxdec.SRCS += args.c args.h vpx_ports/config.h
|
||||
vpxdec.SRCS += tools_common.c tools_common.h
|
||||
vpxdec.SRCS += nestegg/halloc/halloc.h
|
||||
vpxdec.SRCS += nestegg/halloc/src/align.h
|
||||
vpxdec.SRCS += nestegg/halloc/src/halloc.c
|
||||
@@ -28,6 +29,7 @@ vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950
|
||||
vpxdec.DESCRIPTION = Full featured decoder
|
||||
UTILS-$(CONFIG_ENCODERS) += vpxenc.c
|
||||
vpxenc.SRCS += args.c args.h y4minput.c y4minput.h
|
||||
vpxenc.SRCS += tools_common.c tools_common.h
|
||||
vpxenc.SRCS += vpx_ports/config.h vpx_ports/mem_ops.h
|
||||
vpxenc.SRCS += vpx_ports/mem_ops_aligned.h
|
||||
vpxenc.SRCS += libmkv/EbmlIDs.h
|
||||
@@ -75,6 +77,11 @@ GEN_EXAMPLES-$(CONFIG_ENCODERS) += decode_with_drops.c
|
||||
endif
|
||||
decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
|
||||
decode_with_drops.DESCRIPTION = Drops frames while decoding
|
||||
ifeq ($(CONFIG_DECODERS),yes)
|
||||
GEN_EXAMPLES-$(CONFIG_ENCODERS) += decode_packetdrop_eval.c
|
||||
endif
|
||||
decode_partial_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D28
|
||||
decode_partial_with_drops.DESCRIPTION = Drops parts of frames while decoding and evaluate quality
|
||||
GEN_EXAMPLES-$(CONFIG_ENCODERS) += error_resilient.c
|
||||
error_resilient.GUID = DF5837B9-4145-4F92-A031-44E4F832E00C
|
||||
error_resilient.DESCRIPTION = Error Resiliency Feature
|
||||
|
415
examples/decode_packetdrop_eval.txt
Normal file
415
examples/decode_packetdrop_eval.txt
Normal file
@@ -0,0 +1,415 @@
|
||||
@TEMPLATE decoder_tmpl.c
|
||||
Decode With Drops Example
|
||||
=========================
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INTRODUCTION
|
||||
This is an example utility which drops a series of frames, as specified
|
||||
on the command line. This is useful for observing the error recovery
|
||||
features of the codec.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INTRODUCTION
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_INCLUDES
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include "vpx_scale/yv12config.h"
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_INCLUDES
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HELPERS
|
||||
extern double vp8_calc_ssim
|
||||
(
|
||||
YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *dest,
|
||||
int lumamask,
|
||||
double *weight
|
||||
);
|
||||
|
||||
extern double vp8_ssim
|
||||
(
|
||||
const unsigned char *img1,
|
||||
const unsigned char *img2,
|
||||
int stride_img1,
|
||||
int stride_img2,
|
||||
int width,
|
||||
int height
|
||||
);
|
||||
|
||||
struct detect_buffer {
|
||||
char buf[4];
|
||||
size_t buf_read;
|
||||
size_t position;
|
||||
};
|
||||
|
||||
struct parsed_header
|
||||
{
|
||||
char key_frame;
|
||||
int version;
|
||||
char show_frame;
|
||||
int first_part_size;
|
||||
};
|
||||
|
||||
int next_packet(struct parsed_header* hdr, int pos, int length, int mtu)
|
||||
{
|
||||
int size = 0;
|
||||
int remaining = length - pos;
|
||||
/* Uncompressed part is 3 bytes for P frames and 10 bytes for I frames */
|
||||
int uncomp_part_size = (hdr->key_frame ? 10 : 3);
|
||||
/* number of bytes yet to send from header and the first partition */
|
||||
int remainFirst = uncomp_part_size + hdr->first_part_size - pos;
|
||||
if (remainFirst > 0)
|
||||
{
|
||||
if (remainFirst <= mtu)
|
||||
{
|
||||
size = remainFirst;
|
||||
}
|
||||
else
|
||||
{
|
||||
size = mtu;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/* second partition; just slot it up according to MTU */
|
||||
if (remaining <= mtu)
|
||||
{
|
||||
size = remaining;
|
||||
return size;
|
||||
}
|
||||
return mtu;
|
||||
}
|
||||
|
||||
void throw_packets(unsigned char* frame, int* size, int loss_rate, int* thrown, int* kept)
|
||||
{
|
||||
unsigned char loss_frame[256*1024];
|
||||
int pkg_size = 1;
|
||||
int count = 0;
|
||||
int pos = 0;
|
||||
int loss_pos = 0;
|
||||
struct parsed_header hdr;
|
||||
unsigned int tmp;
|
||||
int mtu = 100;
|
||||
|
||||
if (*size < 3)
|
||||
{
|
||||
return;
|
||||
}
|
||||
putc('|', stdout);
|
||||
/* parse uncompressed 3 bytes */
|
||||
tmp = (frame[2] << 16) | (frame[1] << 8) | frame[0];
|
||||
hdr.key_frame = !(tmp & 0x1); /* inverse logic */
|
||||
hdr.version = (tmp >> 1) & 0x7;
|
||||
hdr.show_frame = (tmp >> 4) & 0x1;
|
||||
hdr.first_part_size = (tmp >> 5) & 0x7FFFF;
|
||||
|
||||
/* don't drop key frames */
|
||||
if (hdr.key_frame)
|
||||
{
|
||||
int i;
|
||||
*kept = *size/mtu + ((*size % mtu > 0) ? 1 : 0); /* approximate */
|
||||
for (i=0; i < *kept; i++)
|
||||
putc('.', stdout);
|
||||
return;
|
||||
}
|
||||
|
||||
while ((pkg_size = next_packet(&hdr, pos, *size, mtu)) > 0)
|
||||
{
|
||||
int loss_event = ((rand() + 1.0)/(RAND_MAX + 1.0) < loss_rate/100.0);
|
||||
if (*thrown == 0 && !loss_event)
|
||||
{
|
||||
memcpy(loss_frame + loss_pos, frame + pos, pkg_size);
|
||||
loss_pos += pkg_size;
|
||||
(*kept)++;
|
||||
putc('.', stdout);
|
||||
}
|
||||
else
|
||||
{
|
||||
(*thrown)++;
|
||||
putc('X', stdout);
|
||||
}
|
||||
pos += pkg_size;
|
||||
}
|
||||
memcpy(frame, loss_frame, loss_pos);
|
||||
memset(frame + loss_pos, 0, *size - loss_pos);
|
||||
*size = loss_pos;
|
||||
}
|
||||
|
||||
double ssim_yuv(unsigned char *ptr_ref,
|
||||
unsigned char *ptr_deg,
|
||||
int w,
|
||||
int h,
|
||||
double *weight)
|
||||
{
|
||||
/* insert ref and deg into YV12_BUFFER_CONFIG structs
|
||||
* and calculate SSIM for this frame
|
||||
*/
|
||||
|
||||
YV12_BUFFER_CONFIG ref, deg;
|
||||
|
||||
ref.y_width = w;
|
||||
ref.y_height = h;
|
||||
ref.y_stride = w;
|
||||
ref.uv_width = w/2;
|
||||
ref.uv_height = h/2;
|
||||
ref.uv_stride = w/2;
|
||||
ref.y_buffer = ptr_ref;
|
||||
ref.u_buffer = ptr_ref + w*h;
|
||||
ref.v_buffer = ptr_ref + w*h + (w*h)/4;
|
||||
/* do not need the rest of the struct parameters; leave them as is */
|
||||
|
||||
deg.y_width = w;
|
||||
deg.y_height = h;
|
||||
deg.y_stride = w;
|
||||
deg.uv_width = w/2;
|
||||
deg.uv_height = h/2;
|
||||
deg.uv_stride = w/2;
|
||||
deg.y_buffer = ptr_deg;
|
||||
deg.u_buffer = ptr_deg + w*h;
|
||||
deg.v_buffer = ptr_deg + w*h + (w*h)/4;
|
||||
|
||||
return vp8_calc_ssim(&ref, °, 1, weight);
|
||||
}
|
||||
|
||||
void write_and_eval_frame(unsigned char *img_buf,
|
||||
int w,
|
||||
int h,
|
||||
FILE *outfile,
|
||||
FILE *reffile,
|
||||
double *sum_ssim,
|
||||
double *sum_weight)
|
||||
{
|
||||
/* write frame in img to output file and calculate SSIM */
|
||||
int img_sz = (w*h*3)/2;
|
||||
unsigned char *ref_buf = NULL;
|
||||
double temp_ssim;
|
||||
double weight = 0.0;
|
||||
|
||||
if(!img_buf || !outfile || !reffile)
|
||||
die("Failure in write_and_eval_frame");
|
||||
|
||||
if(fwrite(img_buf, 1, img_sz, outfile) != img_sz)
|
||||
die("Could not write to file");
|
||||
|
||||
/* Read next frame from reference file */
|
||||
ref_buf = (unsigned char *) malloc(img_sz);
|
||||
if(!ref_buf)
|
||||
die("Error allocating memory");
|
||||
|
||||
if(fread(ref_buf, 1, img_sz, reffile) != img_sz)
|
||||
die("Failed to read complete reference frame");
|
||||
|
||||
/* Calculate SSIM */
|
||||
|
||||
temp_ssim = ssim_yuv(ref_buf, img_buf,
|
||||
w, h, &weight);
|
||||
*sum_ssim += temp_ssim * weight;
|
||||
*sum_weight += weight;
|
||||
|
||||
free(ref_buf);
|
||||
}
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HELPERS
|
||||
|
||||
Usage
|
||||
-----
|
||||
This example adds a single argument to the `simple_decoder` example,
|
||||
which specifies the range or pattern of frames to drop. The parameter is
|
||||
parsed as follows:
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ USAGE
|
||||
if(argc!=5 && argc != 6)
|
||||
die("Usage: %s <infile> <outfile> <reffile> <N-M|N/M|L,S>\n",
|
||||
argv[0]);
|
||||
{
|
||||
char *nptr;
|
||||
n = strtol(argv[4], &nptr, 0);
|
||||
mode = (*nptr == '\0' || *nptr == ',') ? 2 : (*nptr == '-') ? 1 : 0;
|
||||
|
||||
m = strtol(nptr+1, NULL, 0);
|
||||
if((!n && !m) || (*nptr != '-' && *nptr != '/' &&
|
||||
*nptr != '\0' && *nptr != ','))
|
||||
die("Couldn't parse pattern %s\n", argv[4]);
|
||||
}
|
||||
seed = (m > 0) ? m : (unsigned int)time(NULL);
|
||||
srand(seed);thrown_frame = 0;
|
||||
printf("Seed: %u\n", seed);
|
||||
|
||||
if(!(reffile = fopen(argv[3], "rb")))
|
||||
die("Failed to open %s for reading", argv[3]);
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ USAGE
|
||||
|
||||
|
||||
Dropping A Range Of Frames
|
||||
--------------------------
|
||||
To drop a range of frames, specify the starting frame and the ending
|
||||
frame to drop, separated by a dash. The following command will drop
|
||||
frames 5 through 10 (base 1).
|
||||
|
||||
$ ./decode_with_drops in.ivf out.i420 5-10
|
||||
|
||||
|
||||
Dropping A Pattern Of Frames
|
||||
----------------------------
|
||||
To drop a pattern of frames, specify the number of frames to drop and
|
||||
the number of frames after which to repeat the pattern, separated by
|
||||
a forward-slash. The following command will drop 3 of 7 frames.
|
||||
Specifically, it will decode 4 frames, then drop 3 frames, and then
|
||||
repeat.
|
||||
|
||||
$ ./decode_with_drops in.ivf out.i420 3/7
|
||||
|
||||
|
||||
Extra Variables
|
||||
---------------
|
||||
This example maintains the pattern passed on the command line in the
|
||||
`n`, `m`, and `is_range` variables:
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_VARS
|
||||
int n, m, mode; //
|
||||
unsigned int seed;
|
||||
int thrown=0, kept=0;
|
||||
int thrown_frame=0, kept_frame=0;
|
||||
unsigned char *last_yuv_buf = NULL;
|
||||
int last_sz = 0;
|
||||
int last_alloc_sz = 0;
|
||||
unsigned char *temp_last = NULL;
|
||||
int expected_decode = 1;
|
||||
FILE *reffile, *sttfile;
|
||||
unsigned char *ref_yuv_buf = NULL;
|
||||
int ref_sz = 0;
|
||||
int ref_alloc_sz = 0;
|
||||
double sum_ssim = 0.0, sum_weight = 0.0;
|
||||
double total_ssim;
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRA_VARS
|
||||
|
||||
|
||||
Making The Drop Decision
|
||||
------------------------
|
||||
The example decides whether to drop the frame based on the current
|
||||
frame number, immediately before decoding the frame.
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PRE_DECODE
|
||||
/* Decide whether to throw parts of the frame or the whole frame
|
||||
depending on the drop mode */
|
||||
thrown_frame = 0;
|
||||
kept_frame = 0;
|
||||
switch (mode)
|
||||
{
|
||||
case 0:
|
||||
if (m - (frame_cnt-1)%m <= n)
|
||||
{
|
||||
frame_sz = 0;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (frame_cnt >= n && frame_cnt <= m)
|
||||
{
|
||||
frame_sz = 0;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
throw_packets(frame, &frame_sz, n, &thrown_frame, &kept_frame);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
if (mode < 2)
|
||||
{
|
||||
if (frame_sz == 0)
|
||||
{
|
||||
putc('X', stdout);
|
||||
thrown_frame++;
|
||||
}
|
||||
else
|
||||
{
|
||||
putc('.', stdout);
|
||||
kept_frame++;
|
||||
}
|
||||
}
|
||||
thrown += thrown_frame;
|
||||
kept += kept_frame;
|
||||
fflush(stdout);
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PRE_DECODE
|
||||
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DECODE
|
||||
/* Decode the frame, use frame_cnt as user-specific data */
|
||||
if(vpx_codec_decode(&codec, frame, frame_sz, NULL /*(void *) frame_cnt*/, 0))
|
||||
die_codec(&codec, "Failed to decode frame");
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DECODE
|
||||
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_FRAME
|
||||
while((img = vpx_codec_get_frame(&codec, &iter))) {
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_FRAME
|
||||
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_DX
|
||||
/* Check if we are missing frames */
|
||||
while(/*((int) img->user_priv)*/ frame_cnt > expected_decode) {
|
||||
write_and_eval_frame(last_yuv_buf,
|
||||
img->d_w,
|
||||
img->d_h,
|
||||
outfile,
|
||||
reffile,
|
||||
&sum_ssim,
|
||||
&sum_weight);
|
||||
|
||||
expected_decode++;
|
||||
}
|
||||
|
||||
expected_decode++;
|
||||
|
||||
/* Check size of last_yuv_buf */
|
||||
if(last_alloc_sz < (img->d_w * img->d_h * 3) / 2) {
|
||||
/* Re-allocate */
|
||||
if(last_yuv_buf) {
|
||||
free(last_yuv_buf);
|
||||
last_yuv_buf = NULL;
|
||||
}
|
||||
last_alloc_sz = (img->d_w * img->d_h * 3) / 2;
|
||||
last_yuv_buf = (unsigned char *) malloc(last_alloc_sz);
|
||||
last_sz = 0;
|
||||
}
|
||||
|
||||
/* First, write new frame to last_yuv_buf */
|
||||
temp_last = last_yuv_buf;
|
||||
last_sz = 0;
|
||||
for(plane=0; plane < 3; plane++) {
|
||||
unsigned char *buf =img->planes[plane];
|
||||
|
||||
for(y=0; y<img->d_h >> (plane?1:0); y++) {
|
||||
memcpy(temp_last, buf, img->d_w >> (plane?1:0));
|
||||
temp_last += img->d_w >> (plane?1:0);
|
||||
last_sz += img->d_w >> (plane?1:0);
|
||||
buf += img->stride[plane];
|
||||
}
|
||||
}
|
||||
|
||||
/* Then, write it to file and calculate SSIM*/
|
||||
write_and_eval_frame(last_yuv_buf,
|
||||
img->d_w,
|
||||
img->d_h,
|
||||
outfile,
|
||||
reffile,
|
||||
&sum_ssim,
|
||||
&sum_weight);
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_DX
|
||||
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
|
||||
|
||||
total_ssim = sum_ssim / sum_weight;
|
||||
printf("SSIM = %f\n", total_ssim);
|
||||
|
||||
if(!(sttfile = fopen("loss.stt", "at")))
|
||||
die("Failed to open loss.stt for writing");
|
||||
fprintf(sttfile, "lossparam \tSSIM\n");
|
||||
fprintf(sttfile, "%s\t%f\n", argv[4], total_ssim);
|
||||
fclose(sttfile);
|
||||
|
||||
if(last_yuv_buf)
|
||||
free(last_yuv_buf);
|
||||
|
||||
if(vpx_codec_destroy(&codec))
|
||||
die_codec(&codec, "Failed to destroy codec");
|
||||
|
||||
fclose(reffile);
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
|
@@ -42,6 +42,8 @@ static void die(const char *fmt, ...) {
|
||||
|
||||
@DIE_CODEC
|
||||
|
||||
@HELPERS
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
FILE *infile, *outfile;
|
||||
vpx_codec_ctx_t codec;
|
||||
|
@@ -78,8 +78,8 @@ if(frame_cnt + 1 == 22) {
|
||||
} else if(frame_cnt + 1 == 44) {
|
||||
vpx_active_map_t active;
|
||||
|
||||
active.rows = 240/16;
|
||||
active.cols = 320/16;
|
||||
active.rows = cfg.g_h/16;
|
||||
active.cols = cfg.g_w/16;
|
||||
|
||||
/* pass in null map to disable active_map*/
|
||||
active.active_map = NULL;
|
||||
|
24
tools_common.c
Normal file
24
tools_common.c
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include "tools_common.h"
|
||||
#ifdef _WIN32
|
||||
#include <io.h>
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
|
||||
FILE* set_binary_mode(FILE *stream)
|
||||
{
|
||||
(void)stream;
|
||||
#ifdef _WIN32
|
||||
_setmode(_fileno(stream), _O_BINARY);
|
||||
#endif
|
||||
return stream;
|
||||
}
|
16
tools_common.h
Normal file
16
tools_common.h
Normal file
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef TOOLS_COMMON_H
|
||||
#define TOOLS_COMMON_H
|
||||
|
||||
/* Sets a stdio stream into binary mode */
|
||||
FILE* set_binary_mode(FILE *stream);
|
||||
|
||||
#endif
|
@@ -44,9 +44,11 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
|
||||
|
||||
vpx_free(oci->above_context);
|
||||
vpx_free(oci->mip);
|
||||
vpx_free(oci->prev_mip);
|
||||
|
||||
oci->above_context = 0;
|
||||
oci->mip = 0;
|
||||
oci->prev_mip = 0;
|
||||
|
||||
}
|
||||
|
||||
@@ -111,6 +113,17 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
|
||||
oci->mi = oci->mip + oci->mode_info_stride + 1;
|
||||
|
||||
/* allocate memory for last frame MODE_INFO array */
|
||||
oci->prev_mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
|
||||
|
||||
if (!oci->prev_mip)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
}
|
||||
|
||||
oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1;
|
||||
|
||||
|
||||
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
|
||||
|
||||
|
@@ -282,6 +282,8 @@ typedef struct
|
||||
|
||||
void *current_bc;
|
||||
|
||||
int corrupted;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
struct VP8_COMMON_RTCD *rtcd;
|
||||
#endif
|
||||
|
@@ -36,6 +36,14 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
|
||||
7, 11, 14, 15,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
|
||||
{
|
||||
1, 2, 6, 7,
|
||||
3, 5, 8, 13,
|
||||
4, 9, 12, 14,
|
||||
10, 11, 15, 16
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]);
|
||||
|
||||
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
|
||||
@@ -106,23 +114,20 @@ static void init_bit_trees()
|
||||
init_bit_tree(cat6, 11);
|
||||
}
|
||||
|
||||
|
||||
static vp8bc_index_t bcc1[1], bcc2[2], bcc3[3], bcc4[4], bcc5[5], bcc6[11];
|
||||
|
||||
vp8_extra_bit_struct vp8_extra_bits[12] =
|
||||
{
|
||||
{ 0, 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, bcc1, 1, 5},
|
||||
{ cat2, Pcat2, bcc2, 2, 7},
|
||||
{ cat3, Pcat3, bcc3, 3, 11},
|
||||
{ cat4, Pcat4, bcc4, 4, 19},
|
||||
{ cat5, Pcat5, bcc5, 5, 35},
|
||||
{ cat6, Pcat6, bcc6, 11, 67},
|
||||
{ 0, 0, 0, 0, 0}
|
||||
{ 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, 1, 5},
|
||||
{ cat2, Pcat2, 2, 7},
|
||||
{ cat3, Pcat3, 3, 11},
|
||||
{ cat4, Pcat4, 4, 19},
|
||||
{ cat5, Pcat5, 5, 35},
|
||||
{ cat6, Pcat6, 11, 67},
|
||||
{ 0, 0, 0, 0}
|
||||
};
|
||||
#include "defaultcoefcounts.h"
|
||||
|
||||
|
@@ -24,10 +24,10 @@
|
||||
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
|
||||
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
|
||||
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-26 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 11-26 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 27-58 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 59+ Extra Bits 11+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
|
||||
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
|
||||
|
||||
#define vp8_coef_tokens 12
|
||||
@@ -42,7 +42,6 @@ typedef struct
|
||||
{
|
||||
vp8_tree_p tree;
|
||||
const vp8_prob *prob;
|
||||
vp8bc_index_t *prob_bc;
|
||||
int Len;
|
||||
int base_val;
|
||||
} vp8_extra_bit_struct;
|
||||
@@ -95,6 +94,7 @@ struct VP8Common;
|
||||
void vp8_default_coef_probs(struct VP8Common *);
|
||||
|
||||
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
|
||||
extern short vp8_default_zig_zag_mask[16];
|
||||
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
|
||||
|
||||
|
@@ -65,11 +65,13 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
|
||||
|
||||
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
|
||||
rtcd->postproc.down = vp8_mbpost_proc_down_c;
|
||||
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
|
||||
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
|
||||
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
|
||||
rtcd->postproc.blend_mb = vp8_blend_mb_c;
|
||||
rtcd->postproc.down = vp8_mbpost_proc_down_c;
|
||||
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
|
||||
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
|
||||
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
|
||||
rtcd->postproc.blend_mb_inner = vp8_blend_mb_inner_c;
|
||||
rtcd->postproc.blend_mb_outer = vp8_blend_mb_outer_c;
|
||||
rtcd->postproc.blend_b = vp8_blend_b_c;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@@ -18,6 +18,7 @@ extern "C"
|
||||
#endif
|
||||
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "vpx/vp8cx.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "type_aliases.h"
|
||||
#include "ppflags.h"
|
||||
@@ -45,7 +46,8 @@ extern "C"
|
||||
typedef enum
|
||||
{
|
||||
USAGE_STREAM_FROM_SERVER = 0x0,
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x1
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x1,
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2
|
||||
} END_USAGE;
|
||||
|
||||
|
||||
@@ -149,6 +151,7 @@ extern "C"
|
||||
int fixed_q;
|
||||
int worst_allowed_q;
|
||||
int best_allowed_q;
|
||||
int cq_level;
|
||||
|
||||
// allow internal resizing ( currently disabled in the build !!!!!)
|
||||
int allow_spatial_resampling;
|
||||
@@ -186,9 +189,10 @@ extern "C"
|
||||
int arnr_strength ;
|
||||
int arnr_type ;
|
||||
|
||||
|
||||
struct vpx_fixed_buf two_pass_stats_in;
|
||||
struct vpx_codec_pkt_list *output_pkt_list;
|
||||
|
||||
vp8e_tuning tuning;
|
||||
} VP8_CONFIG;
|
||||
|
||||
|
||||
@@ -204,7 +208,7 @@ extern "C"
|
||||
// and not just a copy of the pointer..
|
||||
int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time_stamp);
|
||||
int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush);
|
||||
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags);
|
||||
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
|
||||
|
||||
int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags);
|
||||
int vp8_update_reference(VP8_PTR comp, int ref_frame_flags);
|
||||
|
@@ -105,7 +105,7 @@ typedef struct VP8Common
|
||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||
|
||||
FRAME_TYPE last_frame_type; /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
|
||||
FRAME_TYPE last_frame_type; /* Save last frame's frame type for loopfilter init checking and motion search. */
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int show_frame;
|
||||
@@ -140,6 +140,8 @@ typedef struct VP8Common
|
||||
|
||||
MODE_INFO *mip; /* Base of allocated array */
|
||||
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
|
||||
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
|
||||
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
|
||||
|
||||
|
||||
INTERPOLATIONFILTERTYPE mcomp_filter_type;
|
||||
|
@@ -51,7 +51,7 @@ extern "C"
|
||||
int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst);
|
||||
|
||||
int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, INT64 time_stamp);
|
||||
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags);
|
||||
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags);
|
||||
|
||||
int vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
int vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
|
@@ -26,7 +26,7 @@
|
||||
( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
|
||||
|
||||
/* global constants */
|
||||
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
|
||||
{
|
||||
{ RGB_TO_YUV(0x98FB98) }, /* PaleGreen */
|
||||
@@ -41,13 +41,32 @@ static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
|
||||
{ RGB_TO_YUV(0xFF0000) } /* Red */
|
||||
};
|
||||
|
||||
static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
|
||||
static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] =
|
||||
{
|
||||
{ RGB_TO_YUV(0x6633ff) }, /* Purple */
|
||||
{ RGB_TO_YUV(0xcc33ff) }, /* Magenta */
|
||||
{ RGB_TO_YUV(0xff33cc) }, /* Pink */
|
||||
{ RGB_TO_YUV(0xff3366) }, /* Coral */
|
||||
{ RGB_TO_YUV(0x3366ff) }, /* Blue */
|
||||
{ RGB_TO_YUV(0xed00f5) }, /* Dark Blue */
|
||||
{ RGB_TO_YUV(0x2e00b8) }, /* Dark Purple */
|
||||
{ RGB_TO_YUV(0xff6633) }, /* Orange */
|
||||
{ RGB_TO_YUV(0x33ccff) }, /* Light Blue */
|
||||
{ RGB_TO_YUV(0x8ab800) }, /* Green */
|
||||
{ RGB_TO_YUV(0xffcc33) }, /* Light Orange */
|
||||
{ RGB_TO_YUV(0x33ffcc) }, /* Aqua */
|
||||
{ RGB_TO_YUV(0x66ff33) }, /* Light Green */
|
||||
{ RGB_TO_YUV(0xccff33) }, /* Yellow */
|
||||
};
|
||||
|
||||
static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] =
|
||||
{
|
||||
{ RGB_TO_YUV(0x00ff00) }, /* Blue */
|
||||
{ RGB_TO_YUV(0x0000ff) }, /* Green */
|
||||
{ RGB_TO_YUV(0xffff00) }, /* Yellow */
|
||||
{ RGB_TO_YUV(0xff0000) }, /* Red */
|
||||
};
|
||||
#endif
|
||||
|
||||
static const short kernel5[] =
|
||||
{
|
||||
@@ -476,7 +495,7 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
|
||||
* edges unblended to give distinction to macro blocks in areas
|
||||
* filled with the same color block.
|
||||
*/
|
||||
void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
{
|
||||
int i, j;
|
||||
@@ -484,10 +503,10 @@ void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int u1_const = u1*((1<<16)-alpha);
|
||||
int v1_const = v1*((1<<16)-alpha);
|
||||
|
||||
y += stride + 2;
|
||||
for (i = 0; i < 14; i++)
|
||||
y += 2*stride + 2;
|
||||
for (i = 0; i < 12; i++)
|
||||
{
|
||||
for (j = 0; j < 14; j++)
|
||||
for (j = 0; j < 12; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
@@ -511,6 +530,104 @@ void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
}
|
||||
}
|
||||
|
||||
/* Blend only the edge of the macro block. Leave center
|
||||
* unblended to allow for other visualizations to be layered.
|
||||
*/
|
||||
void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
{
|
||||
int i, j;
|
||||
int y1_const = y1*((1<<16)-alpha);
|
||||
int u1_const = u1*((1<<16)-alpha);
|
||||
int v1_const = v1*((1<<16)-alpha);
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
y += stride;
|
||||
}
|
||||
|
||||
for (i = 0; i < 12; i++)
|
||||
{
|
||||
y[0] = (y[0]*alpha + y1_const)>>16;
|
||||
y[1] = (y[1]*alpha + y1_const)>>16;
|
||||
y[14] = (y[14]*alpha + y1_const)>>16;
|
||||
y[15] = (y[15]*alpha + y1_const)>>16;
|
||||
y += stride;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
y += stride;
|
||||
}
|
||||
|
||||
stride >>= 1;
|
||||
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
u[j] = (u[j]*alpha + u1_const)>>16;
|
||||
v[j] = (v[j]*alpha + v1_const)>>16;
|
||||
}
|
||||
u += stride;
|
||||
v += stride;
|
||||
|
||||
for (i = 0; i < 6; i++)
|
||||
{
|
||||
u[0] = (u[0]*alpha + u1_const)>>16;
|
||||
v[0] = (v[0]*alpha + v1_const)>>16;
|
||||
|
||||
u[7] = (u[7]*alpha + u1_const)>>16;
|
||||
v[7] = (v[7]*alpha + v1_const)>>16;
|
||||
|
||||
u += stride;
|
||||
v += stride;
|
||||
}
|
||||
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
u[j] = (u[j]*alpha + u1_const)>>16;
|
||||
v[j] = (v[j]*alpha + v1_const)>>16;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
{
|
||||
int i, j;
|
||||
int y1_const = y1*((1<<16)-alpha);
|
||||
int u1_const = u1*((1<<16)-alpha);
|
||||
int v1_const = v1*((1<<16)-alpha);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
y += stride;
|
||||
}
|
||||
|
||||
stride >>= 1;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
u[j] = (u[j]*alpha + u1_const)>>16;
|
||||
v[j] = (v[j]*alpha + v1_const)>>16;
|
||||
}
|
||||
u += stride;
|
||||
v += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
|
||||
{
|
||||
int dx;
|
||||
@@ -522,7 +639,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
||||
dy = *y1 - y0;
|
||||
|
||||
*x1 = width;
|
||||
if (dy)
|
||||
if (dx)
|
||||
*y1 = ((width-x0)*dy)/dx + y0;
|
||||
}
|
||||
if (*x1 < 0)
|
||||
@@ -531,7 +648,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
||||
dy = *y1 - y0;
|
||||
|
||||
*x1 = 0;
|
||||
if (dy)
|
||||
if (dx)
|
||||
*y1 = ((0-x0)*dy)/dx + y0;
|
||||
}
|
||||
if (*y1 > height)
|
||||
@@ -540,7 +657,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
||||
dy = *y1 - y0;
|
||||
|
||||
*y1 = height;
|
||||
if (dx)
|
||||
if (dy)
|
||||
*x1 = ((height-y0)*dx)/dy + x0;
|
||||
}
|
||||
if (*y1 < 0)
|
||||
@@ -549,7 +666,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
||||
dy = *y1 - y0;
|
||||
|
||||
*y1 = 0;
|
||||
if (dx)
|
||||
if (dy)
|
||||
*x1 = ((0-y0)*dx)/dy + x0;
|
||||
}
|
||||
}
|
||||
@@ -561,10 +678,12 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
||||
#define RTCD_VTABLE(oci) NULL
|
||||
#endif
|
||||
|
||||
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
|
||||
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags)
|
||||
{
|
||||
char message[512];
|
||||
int q = oci->filter_level * 10 / 6;
|
||||
int flags = ppflags->post_proc_flag;
|
||||
int deblock_level = ppflags->deblocking_level;
|
||||
int noise_level = ppflags->noise_level;
|
||||
|
||||
if (!oci->frame_to_show)
|
||||
return -1;
|
||||
@@ -621,8 +740,10 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
||||
oci->post_proc_buffer.y_stride);
|
||||
}
|
||||
|
||||
if (flags & VP8D_DEBUG_LEVEL1)
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
if (flags & VP8D_DEBUG_TXT_FRAME_INFO)
|
||||
{
|
||||
char message[512];
|
||||
sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
|
||||
(oci->frame_type == KEY_FRAME),
|
||||
oci->refresh_golden_frame,
|
||||
@@ -633,7 +754,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
||||
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
|
||||
}
|
||||
|
||||
if (flags & VP8D_DEBUG_LEVEL2)
|
||||
if (flags & VP8D_DEBUG_TXT_MBLK_MODES)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *y_ptr;
|
||||
@@ -665,7 +786,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & VP8D_DEBUG_LEVEL3)
|
||||
if (flags & VP8D_DEBUG_TXT_DC_DIFF)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *y_ptr;
|
||||
@@ -700,45 +821,15 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & VP8D_DEBUG_LEVEL4)
|
||||
if (flags & VP8D_DEBUG_TXT_RATE_INFO)
|
||||
{
|
||||
char message[512];
|
||||
sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
|
||||
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
|
||||
#if 0
|
||||
int i, j;
|
||||
unsigned char *y_ptr;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int mb_rows = post->y_height >> 4;
|
||||
int mb_cols = post->y_width >> 4;
|
||||
int mb_index = 0;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
|
||||
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
|
||||
|
||||
/* vp8_filter each macro block */
|
||||
for (i = 0; i < mb_rows; i++)
|
||||
{
|
||||
for (j = 0; j < mb_cols; j++)
|
||||
{
|
||||
char zz[4];
|
||||
|
||||
sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0');
|
||||
vp8_blit_text(zz, y_ptr, post->y_stride);
|
||||
mb_index ++;
|
||||
y_ptr += 16;
|
||||
}
|
||||
|
||||
mb_index ++; /* border */
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/* Draw motion vectors */
|
||||
if (flags & VP8D_DEBUG_LEVEL5)
|
||||
if ((flags & VP8D_DEBUG_DRAW_MV) && ppflags->display_mv_flag)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
@@ -749,29 +840,144 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
||||
MODE_INFO *mi = oci->mi;
|
||||
int x0, y0;
|
||||
|
||||
for (y0 = 8; y0 < (height + 8); y0 += 16)
|
||||
for (y0 = 0; y0 < height; y0 += 16)
|
||||
{
|
||||
for (x0 = 8; x0 < (width + 8); x0 += 16)
|
||||
for (x0 = 0; x0 < width; x0 += 16)
|
||||
{
|
||||
int x1, y1;
|
||||
if (mi->mbmi.mode >= NEARESTMV)
|
||||
int x1, y1;
|
||||
|
||||
if (!(ppflags->display_mv_flag & (1<<mi->mbmi.mode)))
|
||||
{
|
||||
mi++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (mi->mbmi.mode == SPLITMV)
|
||||
{
|
||||
switch (mi->mbmi.partitioning)
|
||||
{
|
||||
case 0 : /* mv_top_bottom */
|
||||
{
|
||||
B_MODE_INFO *bmi = &mi->bmi[0];
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = x0 + 8 + (mv->col >> 3);
|
||||
y1 = y0 + 4 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+8, &x1, y0+4, &y1, width, height);
|
||||
vp8_blit_line (x0+8, x1, y0+4, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[8];
|
||||
|
||||
x1 = x0 + 8 + (mv->col >> 3);
|
||||
y1 = y0 +12 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+8, &x1, y0+12, &y1, width, height);
|
||||
vp8_blit_line (x0+8, x1, y0+12, y1, y_buffer, y_stride);
|
||||
|
||||
break;
|
||||
}
|
||||
case 1 : /* mv_left_right */
|
||||
{
|
||||
B_MODE_INFO *bmi = &mi->bmi[0];
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = x0 + 4 + (mv->col >> 3);
|
||||
y1 = y0 + 8 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+4, &x1, y0+8, &y1, width, height);
|
||||
vp8_blit_line (x0+4, x1, y0+8, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[2];
|
||||
|
||||
x1 = x0 +12 + (mv->col >> 3);
|
||||
y1 = y0 + 8 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+12, &x1, y0+8, &y1, width, height);
|
||||
vp8_blit_line (x0+12, x1, y0+8, y1, y_buffer, y_stride);
|
||||
|
||||
break;
|
||||
}
|
||||
case 2 : /* mv_quarters */
|
||||
{
|
||||
B_MODE_INFO *bmi = &mi->bmi[0];
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = x0 + 4 + (mv->col >> 3);
|
||||
y1 = y0 + 4 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+4, &x1, y0+4, &y1, width, height);
|
||||
vp8_blit_line (x0+4, x1, y0+4, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[2];
|
||||
|
||||
x1 = x0 +12 + (mv->col >> 3);
|
||||
y1 = y0 + 4 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+12, &x1, y0+4, &y1, width, height);
|
||||
vp8_blit_line (x0+12, x1, y0+4, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[8];
|
||||
|
||||
x1 = x0 + 4 + (mv->col >> 3);
|
||||
y1 = y0 +12 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+4, &x1, y0+12, &y1, width, height);
|
||||
vp8_blit_line (x0+4, x1, y0+12, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[10];
|
||||
|
||||
x1 = x0 +12 + (mv->col >> 3);
|
||||
y1 = y0 +12 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+12, &x1, y0+12, &y1, width, height);
|
||||
vp8_blit_line (x0+12, x1, y0+12, y1, y_buffer, y_stride);
|
||||
break;
|
||||
}
|
||||
default :
|
||||
{
|
||||
B_MODE_INFO *bmi = mi->bmi;
|
||||
int bx0, by0;
|
||||
|
||||
for (by0 = y0; by0 < (y0+16); by0 += 4)
|
||||
{
|
||||
for (bx0 = x0; bx0 < (x0+16); bx0 += 4)
|
||||
{
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = bx0 + 2 + (mv->col >> 3);
|
||||
y1 = by0 + 2 + (mv->row >> 3);
|
||||
|
||||
constrain_line (bx0+2, &x1, by0+2, &y1, width, height);
|
||||
vp8_blit_line (bx0+2, x1, by0+2, y1, y_buffer, y_stride);
|
||||
|
||||
bmi++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (mi->mbmi.mode >= NEARESTMV)
|
||||
{
|
||||
MV *mv = &mi->mbmi.mv.as_mv;
|
||||
const int lx0 = x0 + 8;
|
||||
const int ly0 = y0 + 8;
|
||||
|
||||
x1 = x0 + (mv->col >> 3);
|
||||
y1 = y0 + (mv->row >> 3);
|
||||
x1 = lx0 + (mv->col >> 3);
|
||||
y1 = ly0 + (mv->row >> 3);
|
||||
|
||||
if (x1 != x0 && y1 != y0)
|
||||
if (x1 != lx0 && y1 != ly0)
|
||||
{
|
||||
constrain_line (x0, &x1, y0-1, &y1, width, height);
|
||||
vp8_blit_line (x0, x1, y0-1, y1, y_buffer, y_stride);
|
||||
constrain_line (lx0, &x1, ly0-1, &y1, width, height);
|
||||
vp8_blit_line (lx0, x1, ly0-1, y1, y_buffer, y_stride);
|
||||
|
||||
constrain_line (x0, &x1, y0+1, &y1, width, height);
|
||||
vp8_blit_line (x0, x1, y0+1, y1, y_buffer, y_stride);
|
||||
constrain_line (lx0, &x1, ly0+1, &y1, width, height);
|
||||
vp8_blit_line (lx0, x1, ly0+1, y1, y_buffer, y_stride);
|
||||
}
|
||||
else
|
||||
vp8_blit_line (x0, x1, y0, y1, y_buffer, y_stride);
|
||||
vp8_blit_line (lx0, x1, ly0, y1, y_buffer, y_stride);
|
||||
}
|
||||
|
||||
mi++;
|
||||
}
|
||||
mi++;
|
||||
@@ -779,9 +985,10 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
||||
}
|
||||
|
||||
/* Color in block modes */
|
||||
if (flags & VP8D_DEBUG_LEVEL6)
|
||||
if ((flags & VP8D_DEBUG_CLR_BLK_MODES)
|
||||
&& (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag))
|
||||
{
|
||||
int i, j;
|
||||
int y, x;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
@@ -791,18 +998,54 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
||||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
|
||||
for (i = 0; i < height; i += 16)
|
||||
for (y = 0; y < height; y += 16)
|
||||
{
|
||||
for (j = 0; j < width; j += 16)
|
||||
for (x = 0; x < width; x += 16)
|
||||
{
|
||||
int Y = 0, U = 0, V = 0;
|
||||
|
||||
Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
|
||||
U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
|
||||
V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
|
||||
if (mi->mbmi.mode == B_PRED &&
|
||||
((ppflags->display_mb_modes_flag & B_PRED) || ppflags->display_b_modes_flag))
|
||||
{
|
||||
int by, bx;
|
||||
unsigned char *yl, *ul, *vl;
|
||||
B_MODE_INFO *bmi = mi->bmi;
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
|
||||
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
|
||||
yl = y_ptr + x;
|
||||
ul = u_ptr + (x>>1);
|
||||
vl = v_ptr + (x>>1);
|
||||
|
||||
for (by = 0; by < 16; by += 4)
|
||||
{
|
||||
for (bx = 0; bx < 16; bx += 4)
|
||||
{
|
||||
if ((ppflags->display_b_modes_flag & (1<<mi->mbmi.mode))
|
||||
|| (ppflags->display_mb_modes_flag & B_PRED))
|
||||
{
|
||||
Y = B_PREDICTION_MODE_colors[bmi->mode][0];
|
||||
U = B_PREDICTION_MODE_colors[bmi->mode][1];
|
||||
V = B_PREDICTION_MODE_colors[bmi->mode][2];
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b)
|
||||
(yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride);
|
||||
}
|
||||
bmi++;
|
||||
}
|
||||
|
||||
yl += y_stride*4;
|
||||
ul += y_stride*1;
|
||||
vl += y_stride*1;
|
||||
}
|
||||
}
|
||||
else if (ppflags->display_mb_modes_flag & (1<<mi->mbmi.mode))
|
||||
{
|
||||
Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
|
||||
U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
|
||||
V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_inner)
|
||||
(y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
|
||||
}
|
||||
|
||||
mi++;
|
||||
}
|
||||
@@ -815,9 +1058,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
||||
}
|
||||
|
||||
/* Color in frame reference blocks */
|
||||
if (flags & VP8D_DEBUG_LEVEL7)
|
||||
if ((flags & VP8D_DEBUG_CLR_FRM_REF_BLKS) && ppflags->display_ref_frame_flag)
|
||||
{
|
||||
int i, j;
|
||||
int y, x;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
@@ -827,18 +1070,21 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
||||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
|
||||
for (i = 0; i < height; i += 16)
|
||||
for (y = 0; y < height; y += 16)
|
||||
{
|
||||
for (j = 0; j < width; j +=16)
|
||||
for (x = 0; x < width; x +=16)
|
||||
{
|
||||
int Y = 0, U = 0, V = 0;
|
||||
|
||||
Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
|
||||
U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
|
||||
V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
|
||||
if (ppflags->display_ref_frame_flag & (1<<mi->mbmi.ref_frame))
|
||||
{
|
||||
Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
|
||||
U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
|
||||
V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
|
||||
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer)
|
||||
(y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
|
||||
}
|
||||
|
||||
mi++;
|
||||
}
|
||||
@@ -849,6 +1095,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
||||
mi++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*dest = oci->post_proc_buffer;
|
||||
|
||||
|
@@ -24,7 +24,15 @@
|
||||
char whiteclamp[16], char bothclamp[16],\
|
||||
unsigned int w, unsigned int h, int pitch)
|
||||
|
||||
#define prototype_postproc_blend_mb(sym)\
|
||||
#define prototype_postproc_blend_mb_inner(sym)\
|
||||
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
|
||||
#define prototype_postproc_blend_mb_outer(sym)\
|
||||
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
|
||||
#define prototype_postproc_blend_b(sym)\
|
||||
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
|
||||
@@ -52,22 +60,36 @@ extern prototype_postproc(vp8_postproc_downacross);
|
||||
#endif
|
||||
extern prototype_postproc_addnoise(vp8_postproc_addnoise);
|
||||
|
||||
#ifndef vp8_postproc_blend_mb
|
||||
#define vp8_postproc_blend_mb vp8_blend_mb_c
|
||||
#ifndef vp8_postproc_blend_mb_inner
|
||||
#define vp8_postproc_blend_mb_inner vp8_blend_mb_inner_c
|
||||
#endif
|
||||
extern prototype_postproc_blend_mb(vp8_postproc_blend_mb);
|
||||
extern prototype_postproc_blend_mb_inner(vp8_postproc_blend_mb_inner);
|
||||
|
||||
#ifndef vp8_postproc_blend_mb_outer
|
||||
#define vp8_postproc_blend_mb_outer vp8_blend_mb_outer_c
|
||||
#endif
|
||||
extern prototype_postproc_blend_mb_outer(vp8_postproc_blend_mb_outer);
|
||||
|
||||
#ifndef vp8_postproc_blend_b
|
||||
#define vp8_postproc_blend_b vp8_blend_b_c
|
||||
#endif
|
||||
extern prototype_postproc_blend_b(vp8_postproc_blend_b);
|
||||
|
||||
typedef prototype_postproc((*vp8_postproc_fn_t));
|
||||
typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
|
||||
typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
|
||||
typedef prototype_postproc_blend_mb((*vp8_postproc_blend_mb_fn_t));
|
||||
typedef prototype_postproc_blend_mb_inner((*vp8_postproc_blend_mb_inner_fn_t));
|
||||
typedef prototype_postproc_blend_mb_outer((*vp8_postproc_blend_mb_outer_fn_t));
|
||||
typedef prototype_postproc_blend_b((*vp8_postproc_blend_b_fn_t));
|
||||
typedef struct
|
||||
{
|
||||
vp8_postproc_inplace_fn_t down;
|
||||
vp8_postproc_inplace_fn_t across;
|
||||
vp8_postproc_fn_t downacross;
|
||||
vp8_postproc_addnoise_fn_t addnoise;
|
||||
vp8_postproc_blend_mb_fn_t blend_mb;
|
||||
vp8_postproc_inplace_fn_t down;
|
||||
vp8_postproc_inplace_fn_t across;
|
||||
vp8_postproc_fn_t downacross;
|
||||
vp8_postproc_addnoise_fn_t addnoise;
|
||||
vp8_postproc_blend_mb_inner_fn_t blend_mb_inner;
|
||||
vp8_postproc_blend_mb_outer_fn_t blend_mb_outer;
|
||||
vp8_postproc_blend_b_fn_t blend_b;
|
||||
} vp8_postproc_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
@@ -89,7 +111,7 @@ struct postproc_state
|
||||
#include "onyxc_int.h"
|
||||
#include "ppflags.h"
|
||||
int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
|
||||
int deblock_level, int noise_level, int flags);
|
||||
vp8_ppflags_t *flags);
|
||||
|
||||
|
||||
void vp8_de_noise(YV12_BUFFER_CONFIG *source,
|
||||
|
@@ -13,17 +13,28 @@
|
||||
#define __INC_PPFLAGS_H
|
||||
enum
|
||||
{
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1<<0,
|
||||
VP8D_DEMACROBLOCK = 1<<1,
|
||||
VP8D_ADDNOISE = 1<<2,
|
||||
VP8D_DEBUG_LEVEL1 = 1<<3,
|
||||
VP8D_DEBUG_LEVEL2 = 1<<4,
|
||||
VP8D_DEBUG_LEVEL3 = 1<<5,
|
||||
VP8D_DEBUG_LEVEL4 = 1<<6,
|
||||
VP8D_DEBUG_LEVEL5 = 1<<7,
|
||||
VP8D_DEBUG_LEVEL6 = 1<<8,
|
||||
VP8D_DEBUG_LEVEL7 = 1<<9
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1<<0,
|
||||
VP8D_DEMACROBLOCK = 1<<1,
|
||||
VP8D_ADDNOISE = 1<<2,
|
||||
VP8D_DEBUG_TXT_FRAME_INFO = 1<<3,
|
||||
VP8D_DEBUG_TXT_MBLK_MODES = 1<<4,
|
||||
VP8D_DEBUG_TXT_DC_DIFF = 1<<5,
|
||||
VP8D_DEBUG_TXT_RATE_INFO = 1<<6,
|
||||
VP8D_DEBUG_DRAW_MV = 1<<7,
|
||||
VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
|
||||
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int post_proc_flag;
|
||||
int deblocking_level;
|
||||
int noise_level;
|
||||
int display_ref_frame_flag;
|
||||
int display_mb_modes_flag;
|
||||
int display_b_modes_flag;
|
||||
int display_mv_flag;
|
||||
} vp8_ppflags_t;
|
||||
|
||||
#endif
|
||||
|
@@ -1,46 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Module Title : preproc.h
|
||||
*
|
||||
* Description : simple preprocessor
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef __INC_PREPROC_H
|
||||
#define __INC_PREPROC_H
|
||||
|
||||
/****************************************************************************
|
||||
* Types
|
||||
****************************************************************************/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned char *frame_buffer;
|
||||
int frame;
|
||||
unsigned int *fixed_divide;
|
||||
|
||||
unsigned char *frame_buffer_alloc;
|
||||
unsigned int *fixed_divide_alloc;
|
||||
} pre_proc_instance;
|
||||
|
||||
/****************************************************************************
|
||||
* Functions.
|
||||
****************************************************************************/
|
||||
void pre_proc_machine_specific_config(void);
|
||||
void delete_pre_proc(pre_proc_instance *ppi);
|
||||
int init_pre_proc(pre_proc_instance *ppi, int frame_size);
|
||||
extern void spatial_filter_c(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int width, int height, int pitch, int strength);
|
||||
extern void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
|
||||
|
||||
#endif
|
@@ -1,76 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Module Title : preproc_if.h
|
||||
*
|
||||
* Description : Pre-processor interface header file.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef __PREPROC_IF_H
|
||||
#define __PREPROC_IF_H
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
****************************************************************************/
|
||||
#include "type_aliases.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Types
|
||||
****************************************************************************/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UINT8 *Yuv0ptr;
|
||||
UINT8 *Yuv1ptr;
|
||||
|
||||
UINT8 *frag_info; // blocks coded : passed in
|
||||
UINT32 frag_info_element_size; // size of each element
|
||||
UINT32 frag_info_coded_mask; // mask to get at whether fragment is coded
|
||||
|
||||
UINT32 *region_index; // Gives pixel index for top left of each block
|
||||
UINT32 video_frame_height;
|
||||
UINT32 video_frame_width;
|
||||
UINT8 hfrag_pixels;
|
||||
UINT8 vfrag_pixels;
|
||||
|
||||
} SCAN_CONFIG_DATA;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SCP_FILTER_ON_OFF,
|
||||
SCP_SET_SRF_OFFSET,
|
||||
SCP_SET_EBO_ON_OFF,
|
||||
SCP_SET_VCAP_LEVEL_OFFSET,
|
||||
SCP_SET_SHOW_LOCAL
|
||||
|
||||
} SCP_SETTINGS;
|
||||
|
||||
typedef struct PP_INSTANCE *x_pp_inst;
|
||||
|
||||
/****************************************************************************
|
||||
* Module statics
|
||||
****************************************************************************/
|
||||
/* Controls whether Early break out is on or off in default case */
|
||||
#define EARLY_BREAKOUT_DEFAULT TRUE
|
||||
|
||||
/****************************************************************************
|
||||
* Functions
|
||||
****************************************************************************/
|
||||
extern void set_scan_param(x_pp_inst ppi, UINT32 param_id, INT32 param_value);
|
||||
extern UINT32 yuvanalyse_frame(x_pp_inst ppi, UINT32 *KFIndicator);
|
||||
extern x_pp_inst create_pp_instance(void);
|
||||
extern void delete_pp_instance(x_pp_inst *);
|
||||
extern BOOL scan_yuvinit(x_pp_inst, SCAN_CONFIG_DATA *scan_config_ptr);
|
||||
|
||||
#endif
|
@@ -206,4 +206,29 @@ static int vp8_decode_value(BOOL_DECODER *br, int bits)
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
static int vp8dx_bool_error(BOOL_DECODER *br)
|
||||
{
|
||||
/* Check if we have reached the end of the buffer.
|
||||
*
|
||||
* Variable 'count' stores the number of bits in the 'value' buffer,
|
||||
* minus 8. So if count == 8, there are 16 bits available to be read.
|
||||
* Normally, count is filled with 8 and one byte is filled into the
|
||||
* value buffer. When we reach the end of the buffer, count is instead
|
||||
* filled with VP8_LOTS_OF_BITS, 8 of which represent the last 8 real
|
||||
* bits from the bitstream. So the last bit in the bitstream will be
|
||||
* represented by count == VP8_LOTS_OF_BITS - 16.
|
||||
*/
|
||||
if ((br->count > VP8_BD_VALUE_SIZE)
|
||||
&& (br->count <= VP8_LOTS_OF_BITS - 16))
|
||||
{
|
||||
/* We have tried to decode bits after the end of
|
||||
* stream was encountered.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* No error. */
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@@ -19,7 +19,7 @@
|
||||
extern void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd);
|
||||
extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
|
||||
extern void vp8_decoder_create_threads(VP8D_COMP *pbi);
|
||||
extern int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
|
||||
extern void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
|
||||
extern void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows);
|
||||
#endif
|
||||
|
||||
|
@@ -381,6 +381,12 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
|
||||
xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
|
||||
xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
|
||||
|
||||
if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
/* propagate errors from reference frames */
|
||||
xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
|
||||
}
|
||||
|
||||
vp8_build_uvmvs(xd, pc->full_pixel);
|
||||
|
||||
/*
|
||||
@@ -391,6 +397,8 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
|
||||
*/
|
||||
vp8_decode_macroblock(pbi, xd);
|
||||
|
||||
/* check if the boolean decoder has suffered an error */
|
||||
xd->corrupted |= vp8dx_bool_error(xd->current_bc);
|
||||
|
||||
recon_yoffset += 16;
|
||||
recon_uvoffset += 8;
|
||||
@@ -461,7 +469,8 @@ static void setup_token_decoder(VP8D_COMP *pbi,
|
||||
partition_size = user_data_end - partition;
|
||||
}
|
||||
|
||||
if (user_data_end - partition < partition_size)
|
||||
if (partition + partition_size > user_data_end
|
||||
|| partition + partition_size < partition)
|
||||
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
||||
"Truncated packet or corrupt partition "
|
||||
"%d length", i + 1);
|
||||
@@ -484,7 +493,6 @@ static void setup_token_decoder(VP8D_COMP *pbi,
|
||||
|
||||
static void stop_token_decoder(VP8D_COMP *pbi)
|
||||
{
|
||||
int i;
|
||||
VP8_COMMON *pc = &pbi->common;
|
||||
|
||||
if (pc->multi_token_partition != ONE_PARTITION)
|
||||
@@ -555,6 +563,7 @@ static void init_frame(VP8D_COMP *pbi)
|
||||
xd->frame_type = pc->frame_type;
|
||||
xd->mode_info_context->mbmi.mode = DC_PRED;
|
||||
xd->mode_info_stride = pc->mode_info_stride;
|
||||
xd->corrupted = 0; /* init without corruption */
|
||||
}
|
||||
|
||||
int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
@@ -570,6 +579,10 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
int i, j, k, l;
|
||||
const int *const mb_feature_data_bits = vp8_mb_feature_data_bits;
|
||||
|
||||
/* start with no corruption of current frame */
|
||||
xd->corrupted = 0;
|
||||
pc->yv12_fb[pc->new_fb_idx].corrupted = 0;
|
||||
|
||||
if (data_end - data < 3)
|
||||
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
||||
"Truncated packet");
|
||||
@@ -580,7 +593,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
(data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;
|
||||
data += 3;
|
||||
|
||||
if (data_end - data < first_partition_length_in_bytes)
|
||||
if (data + first_partition_length_in_bytes > data_end
|
||||
|| data + first_partition_length_in_bytes < data)
|
||||
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
||||
"Truncated packet or corrupt partition 0 length");
|
||||
vp8_setup_version(pc);
|
||||
@@ -890,6 +904,14 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
|
||||
stop_token_decoder(pbi);
|
||||
|
||||
/* Collect information about decoder corruption. */
|
||||
/* 1. Check first boolean decoder for errors. */
|
||||
pc->yv12_fb[pc->new_fb_idx].corrupted =
|
||||
vp8dx_bool_error(bc);
|
||||
/* 2. Check the macroblock information */
|
||||
pc->yv12_fb[pc->new_fb_idx].corrupted |=
|
||||
xd->corrupted;
|
||||
|
||||
/* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */
|
||||
|
||||
/* If this was a kf or Gf note the Q used */
|
||||
|
@@ -254,12 +254,7 @@ static void ref_cnt_fb (int *buf, int *idx, int new_idx)
|
||||
/* If any buffer copy / swapping is signalled it should be done here. */
|
||||
static int swap_frame_buffers (VP8_COMMON *cm)
|
||||
{
|
||||
int fb_to_update_with, err = 0;
|
||||
|
||||
if (cm->refresh_last_frame)
|
||||
fb_to_update_with = cm->lst_fb_idx;
|
||||
else
|
||||
fb_to_update_with = cm->new_fb_idx;
|
||||
int err = 0;
|
||||
|
||||
/* The alternate reference frame or golden frame can be updated
|
||||
* using the new, last, or golden/alt ref frame. If it
|
||||
@@ -271,7 +266,7 @@ static int swap_frame_buffers (VP8_COMMON *cm)
|
||||
int new_fb = 0;
|
||||
|
||||
if (cm->copy_buffer_to_arf == 1)
|
||||
new_fb = fb_to_update_with;
|
||||
new_fb = cm->lst_fb_idx;
|
||||
else if (cm->copy_buffer_to_arf == 2)
|
||||
new_fb = cm->gld_fb_idx;
|
||||
else
|
||||
@@ -285,7 +280,7 @@ static int swap_frame_buffers (VP8_COMMON *cm)
|
||||
int new_fb = 0;
|
||||
|
||||
if (cm->copy_buffer_to_gf == 1)
|
||||
new_fb = fb_to_update_with;
|
||||
new_fb = cm->lst_fb_idx;
|
||||
else if (cm->copy_buffer_to_gf == 2)
|
||||
new_fb = cm->alt_fb_idx;
|
||||
else
|
||||
@@ -334,6 +329,23 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
|
||||
pbi->common.error.error_code = VPX_CODEC_OK;
|
||||
|
||||
if (size == 0)
|
||||
{
|
||||
/* This is used to signal that we are missing frames.
|
||||
* We do not know if the missing frame(s) was supposed to update
|
||||
* any of the reference buffers, but we act conservative and
|
||||
* mark only the last buffer as corrupted.
|
||||
*/
|
||||
cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
|
||||
|
||||
/* Signal that we have no frame to show. */
|
||||
cm->show_frame = 0;
|
||||
|
||||
/* Nothing more to do. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#if HAVE_ARMV7
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
if (cm->rtcd.flags & HAS_NEON)
|
||||
@@ -356,6 +368,13 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
}
|
||||
#endif
|
||||
pbi->common.error.setjmp = 0;
|
||||
|
||||
/* We do not know if the missing frame(s) was supposed to update
|
||||
* any of the reference buffers, but we act conservative and
|
||||
* mark only the last buffer as corrupted.
|
||||
*/
|
||||
cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
|
||||
|
||||
if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
|
||||
cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
|
||||
return -1;
|
||||
@@ -388,6 +407,16 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
return retcode;
|
||||
}
|
||||
|
||||
/* copy mode info to storage for future error concealment */
|
||||
if (pbi->common.prev_mip)
|
||||
{
|
||||
/* size allocated in vp8_alloc_frame_buffers() */
|
||||
int size_of_mip = (pbi->common.mb_cols + 1) * (pbi->common.mb_rows + 1)
|
||||
* sizeof(MODE_INFO);
|
||||
|
||||
memcpy(pbi->common.prev_mip, pbi->common.mip, size_of_mip);
|
||||
}
|
||||
|
||||
if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION)
|
||||
{
|
||||
if (swap_frame_buffers (cm))
|
||||
@@ -506,7 +535,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
|
||||
pbi->common.error.setjmp = 0;
|
||||
return retcode;
|
||||
}
|
||||
int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags)
|
||||
int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags)
|
||||
{
|
||||
int ret = -1;
|
||||
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
|
||||
@@ -524,7 +553,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
|
||||
|
||||
sd->clrtype = pbi->common.clr_type;
|
||||
#if CONFIG_POSTPROC
|
||||
ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags);
|
||||
ret = vp8_post_proc_frame(&pbi->common, sd, flags);
|
||||
#else
|
||||
|
||||
if (pbi->common.frame_to_show)
|
||||
|
@@ -451,7 +451,6 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
|
||||
#if CONFIG_MULTITHREAD
|
||||
int core_count = 0;
|
||||
int ithread;
|
||||
int i;
|
||||
|
||||
pbi->b_multithreaded_rd = 0;
|
||||
pbi->allocated_decoding_thread_count = 0;
|
||||
@@ -596,7 +595,7 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
|
||||
}
|
||||
|
||||
|
||||
int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
|
||||
void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
VP8_COMMON *const pc = & pbi->common;
|
||||
@@ -647,7 +646,6 @@ int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
|
||||
for (i=0; i< pc->mb_rows; i++)
|
||||
CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
(void) pbi;
|
||||
(void) width;
|
||||
@@ -722,7 +720,6 @@ void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
|
||||
/*int mb_row;
|
||||
int mb_col;
|
||||
int baseline_filter_level[MAX_MB_SEGMENTS];*/
|
||||
int filter_level;
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
|
||||
int i;
|
||||
@@ -770,7 +767,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
|
||||
int ibc = 0;
|
||||
int num_part = 1 << pbi->common.multi_token_partition;
|
||||
int i, j;
|
||||
int i;
|
||||
volatile int *last_row_current_mb_col = NULL;
|
||||
int nsync = pbi->sync_range;
|
||||
|
||||
@@ -810,7 +807,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
|
||||
for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
|
||||
{
|
||||
int i;
|
||||
|
||||
xd->current_bc = &pbi->mbc[mb_row%num_part];
|
||||
|
||||
@@ -894,9 +890,18 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
|
||||
xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
|
||||
xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
|
||||
|
||||
if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
/* propagate errors from reference frames */
|
||||
xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
|
||||
}
|
||||
|
||||
vp8_build_uvmvs(xd, pc->full_pixel);
|
||||
vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
|
||||
|
||||
/* check if the boolean decoder has suffered an error */
|
||||
xd->corrupted |= vp8dx_bool_error(xd->current_bc);
|
||||
|
||||
if (pbi->common.filter_level)
|
||||
{
|
||||
/* Save decoded MB last row data for next-row decoding */
|
||||
|
@@ -29,10 +29,9 @@
|
||||
push {r4-r11, lr}
|
||||
|
||||
; Add size of xcount * sizeof (TOKENEXTRA) to get stop
|
||||
; sizeof (TOKENEXTRA) is 20
|
||||
add r2, r2, r2, lsl #2 ; xcount
|
||||
; sizeof (TOKENEXTRA) is 8
|
||||
sub sp, sp, #12
|
||||
add r2, r1, r2, lsl #2 ; stop = p + xcount
|
||||
add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
|
||||
str r2, [sp, #0]
|
||||
str r3, [sp, #8] ; save vp8_coef_encodings
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
@@ -41,13 +40,13 @@
|
||||
b check_p_lt_stop
|
||||
|
||||
while_p_lt_stop
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #8] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldr r7, [r1, #tokenextra_skip_eob_node]
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
@@ -142,12 +141,11 @@ token_count_lt_zero
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #48] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 20
|
||||
add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
@@ -155,7 +153,7 @@ token_count_lt_zero
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldr lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
|
@@ -62,13 +62,13 @@ mb_row_loop
|
||||
; actuall work gets done here!
|
||||
|
||||
while_p_lt_stop
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #20] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldr r7, [r1, #tokenextra_skip_eob_node]
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
@@ -163,12 +163,11 @@ token_count_lt_zero
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #8] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 20
|
||||
add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
@@ -176,7 +175,7 @@ token_count_lt_zero
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldr lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
|
@@ -90,13 +90,13 @@ mb_row_loop
|
||||
; actual work gets done here!
|
||||
|
||||
while_p_lt_stop
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #80] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldr r7, [r1, #tokenextra_skip_eob_node]
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
@@ -191,12 +191,11 @@ token_count_lt_zero
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldr r6, [r1, #tokenextra_token] ; t
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #84] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 20
|
||||
add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
@@ -204,7 +203,7 @@ token_count_lt_zero
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldr lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
|
@@ -29,7 +29,7 @@ extern int vp8_fast_quantize_b_neon_func(short *coeff_ptr, short *zbin_ptr, shor
|
||||
|
||||
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant);
|
||||
d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant_fast);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -51,7 +51,6 @@ DEFINE(vp8_token_len, offsetof(vp8_token, Len));
|
||||
|
||||
DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree));
|
||||
DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob));
|
||||
DEFINE(vp8_extra_bit_struct_prob_bc, offsetof(vp8_extra_bit_struct, prob_bc));
|
||||
DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len));
|
||||
DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val));
|
||||
|
||||
@@ -67,8 +66,8 @@ DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
|
||||
|
||||
// These two sizes are used in vp7cx_pack_tokens. They are hard coded
|
||||
// so if the size changes this will have to be adjusted.
|
||||
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 20)
|
||||
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 20)
|
||||
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)
|
||||
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)
|
||||
|
||||
//add asserts for any offset that is not supported by assembly code
|
||||
//add asserts for any size that is not supported by assembly code
|
||||
|
@@ -33,6 +33,7 @@ typedef struct
|
||||
|
||||
// 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
|
||||
short *quant;
|
||||
short *quant_fast;
|
||||
short *quant_shift;
|
||||
short *zbin;
|
||||
short *zrun_zbin_boost;
|
||||
@@ -81,6 +82,7 @@ typedef struct
|
||||
int errthresh;
|
||||
int rddiv;
|
||||
int rdmult;
|
||||
INT64 activity_sum;
|
||||
|
||||
int mvcosts[2][MVvals+1];
|
||||
int *mvcost[2];
|
||||
|
@@ -62,7 +62,6 @@ unsigned int b_modes[14] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
static const int qrounding_factors[129] =
|
||||
{
|
||||
56, 56, 56, 56, 48, 48, 56, 56,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
@@ -78,12 +77,18 @@ static const int qrounding_factors[129] =
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48
|
||||
};
|
||||
|
||||
static const int qzbin_factors[129] =
|
||||
{
|
||||
72, 72, 72, 72, 80, 80, 72, 72,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
@@ -94,17 +99,11 @@ static const int qzbin_factors[129] =
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80,
|
||||
80
|
||||
};
|
||||
|
||||
static const int qrounding_factors_y2[129] =
|
||||
{
|
||||
56, 56, 56, 56, 48, 48, 56, 56,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
@@ -120,12 +119,18 @@ static const int qrounding_factors_y2[129] =
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48,
|
||||
48, 48, 48, 48, 48, 48, 48, 48,
|
||||
48
|
||||
};
|
||||
|
||||
static const int qzbin_factors_y2[129] =
|
||||
{
|
||||
72, 72, 72, 72, 80, 80, 72, 72,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
84, 84, 84, 84, 84, 84, 84, 84,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
@@ -136,26 +141,30 @@ static const int qzbin_factors_y2[129] =
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80, 80, 80, 80, 80, 80, 80, 80,
|
||||
80,
|
||||
80
|
||||
};
|
||||
|
||||
//#define EXACT_QUANT
|
||||
#define EXACT_QUANT
|
||||
#ifdef EXACT_QUANT
|
||||
static void vp8cx_invert_quant(short *quant, short *shift, short d)
|
||||
static void vp8cx_invert_quant(int improved_quant, short *quant,
|
||||
short *shift, short d)
|
||||
{
|
||||
unsigned t;
|
||||
int l;
|
||||
t = d;
|
||||
for(l = 0; t > 1; l++)
|
||||
t>>=1;
|
||||
t = 1 + (1<<(16+l))/d;
|
||||
*quant = (short)(t - (1<<16));
|
||||
*shift = l;
|
||||
if(improved_quant)
|
||||
{
|
||||
unsigned t;
|
||||
int l;
|
||||
t = d;
|
||||
for(l = 0; t > 1; l++)
|
||||
t>>=1;
|
||||
t = 1 + (1<<(16+l))/d;
|
||||
*quant = (short)(t - (1<<16));
|
||||
*shift = l;
|
||||
}
|
||||
else
|
||||
{
|
||||
*quant = (1 << 16) / d;
|
||||
*shift = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
@@ -170,7 +179,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
{
|
||||
// dc values
|
||||
quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
|
||||
vp8cx_invert_quant(cpi->Y1quant[Q] + 0,
|
||||
cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
|
||||
cpi->Y1quant_shift[Q] + 0, quant_val);
|
||||
cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
@@ -178,7 +188,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
||||
|
||||
quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
|
||||
vp8cx_invert_quant(cpi->Y2quant[Q] + 0,
|
||||
cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
|
||||
cpi->Y2quant_shift[Q] + 0, quant_val);
|
||||
cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
|
||||
@@ -186,7 +197,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
|
||||
|
||||
quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
|
||||
vp8cx_invert_quant(cpi->UVquant[Q] + 0,
|
||||
cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
|
||||
cpi->UVquant_shift[Q] + 0, quant_val);
|
||||
cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
|
||||
cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
@@ -199,7 +211,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
int rc = vp8_default_zig_zag1d[i];
|
||||
|
||||
quant_val = vp8_ac_yquant(Q);
|
||||
vp8cx_invert_quant(cpi->Y1quant[Q] + rc,
|
||||
cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
|
||||
cpi->Y1quant_shift[Q] + rc, quant_val);
|
||||
cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
@@ -207,7 +220,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
|
||||
|
||||
quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
|
||||
vp8cx_invert_quant(cpi->Y2quant[Q] + rc,
|
||||
cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
|
||||
cpi->Y2quant_shift[Q] + rc, quant_val);
|
||||
cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
|
||||
cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
|
||||
@@ -215,7 +229,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
|
||||
cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
|
||||
|
||||
quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
|
||||
vp8cx_invert_quant(cpi->UVquant[Q] + rc,
|
||||
cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
|
||||
vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
|
||||
cpi->UVquant_shift[Q] + rc, quant_val);
|
||||
cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
|
||||
cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
|
||||
@@ -316,6 +331,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
x->block[i].quant = cpi->Y1quant[QIndex];
|
||||
x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
|
||||
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
|
||||
x->block[i].zbin = cpi->Y1zbin[QIndex];
|
||||
x->block[i].round = cpi->Y1round[QIndex];
|
||||
@@ -330,6 +346,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
for (i = 16; i < 24; i++)
|
||||
{
|
||||
x->block[i].quant = cpi->UVquant[QIndex];
|
||||
x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
|
||||
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
|
||||
x->block[i].zbin = cpi->UVzbin[QIndex];
|
||||
x->block[i].round = cpi->UVround[QIndex];
|
||||
@@ -340,6 +357,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
|
||||
// Y2
|
||||
zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
|
||||
x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
|
||||
x->block[24].quant = cpi->Y2quant[QIndex];
|
||||
x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
|
||||
x->block[24].zbin = cpi->Y2zbin[QIndex];
|
||||
@@ -351,6 +369,9 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
|
||||
void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
|
||||
{
|
||||
// Clear Zbin mode boost for default case
|
||||
cpi->zbin_mode_boost = 0;
|
||||
|
||||
// vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
|
||||
// when these values are not all zero.
|
||||
if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
|
||||
@@ -363,6 +384,61 @@ void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
|
||||
/* activity_avg must be positive, or flat regions could get a zero weight
|
||||
* (infinite lambda), which confounds analysis.
|
||||
* This also avoids the need for divide by zero checks in
|
||||
* vp8_activity_masking().
|
||||
*/
|
||||
#define VP8_ACTIVITY_AVG_MIN (64)
|
||||
|
||||
/* This is used as a reference when computing the source variance for the
|
||||
* purposes of activity masking.
|
||||
* Eventually this should be replaced by custom no-reference routines,
|
||||
* which will be faster.
|
||||
*/
|
||||
static const unsigned char VP8_VAR_OFFS[16]=
|
||||
{
|
||||
128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
|
||||
};
|
||||
|
||||
unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
|
||||
{
|
||||
unsigned int act;
|
||||
unsigned int sse;
|
||||
int sum;
|
||||
unsigned int a;
|
||||
unsigned int b;
|
||||
/* TODO: This could also be done over smaller areas (8x8), but that would
|
||||
* require extensive changes elsewhere, as lambda is assumed to be fixed
|
||||
* over an entire MB in most of the code.
|
||||
* Another option is to compute four 8x8 variances, and pick a single
|
||||
* lambda using a non-linear combination (e.g., the smallest, or second
|
||||
* smallest, etc.).
|
||||
*/
|
||||
VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
|
||||
x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
|
||||
/* This requires a full 32 bits of precision. */
|
||||
act = (sse<<8) - sum*sum;
|
||||
/* Drop 4 to give us some headroom to work with. */
|
||||
act = (act + 8) >> 4;
|
||||
/* If the region is flat, lower the activity some more. */
|
||||
if (act < 8<<12)
|
||||
act = act < 5<<12 ? act : 5<<12;
|
||||
/* TODO: For non-flat regions, edge regions should receive less masking
|
||||
* than textured regions, but identifying edge regions quickly and
|
||||
* reliably enough is still a subject of experimentation.
|
||||
* This will be most noticable near edges with a complex shape (e.g.,
|
||||
* text), but the 4x4 transform size should make this less of a problem
|
||||
* than it would be for an 8x8 transform.
|
||||
*/
|
||||
/* Apply the masking to the RD multiplier. */
|
||||
a = act + 4*cpi->activity_avg;
|
||||
b = 4*act + cpi->activity_avg;
|
||||
x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
|
||||
return act;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static
|
||||
void encode_mb_row(VP8_COMP *cpi,
|
||||
@@ -374,6 +450,7 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
int *segment_counts,
|
||||
int *totalrate)
|
||||
{
|
||||
INT64 activity_sum = 0;
|
||||
int i;
|
||||
int recon_yoffset, recon_uvoffset;
|
||||
int mb_col;
|
||||
@@ -402,14 +479,14 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
// Set up limit values for vertical motion vector components
|
||||
// to prevent them extending beyond the UMV borders
|
||||
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
|
||||
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
|
||||
+ (VP8BORDERINPIXELS - 16);
|
||||
|
||||
// for each macroblock col in image
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
// Distance of Mb to the left & right edges, specified in
|
||||
// 1/8th pel units as they are always compared to values
|
||||
// Distance of Mb to the left & right edges, specified in
|
||||
// 1/8th pel units as they are always compared to values
|
||||
// that are in 1/8th pel units
|
||||
xd->mb_to_left_edge = -((mb_col * 16) << 3);
|
||||
xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
|
||||
@@ -417,7 +494,7 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
// Set up limit values for horizontal motion vector components
|
||||
// to prevent them extending beyond the UMV borders
|
||||
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
|
||||
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
|
||||
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
|
||||
+ (VP8BORDERINPIXELS - 16);
|
||||
|
||||
xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
|
||||
@@ -425,6 +502,12 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
|
||||
xd->left_available = (mb_col != 0);
|
||||
|
||||
x->rddiv = cpi->RDDIV;
|
||||
x->rdmult = cpi->RDMULT;
|
||||
|
||||
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
|
||||
activity_sum += vp8_activity_masking(cpi, x);
|
||||
|
||||
// Is segmentation enabled
|
||||
// MB level adjutment to quantizer
|
||||
if (xd->segmentation_enabled)
|
||||
@@ -531,6 +614,7 @@ void encode_mb_row(VP8_COMP *cpi,
|
||||
// this is to account for the border
|
||||
xd->mode_info_context++;
|
||||
x->partition_info++;
|
||||
x->activity_sum += activity_sum;
|
||||
}
|
||||
|
||||
|
||||
@@ -544,7 +628,6 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
VP8_COMMON *const cm = & cpi->common;
|
||||
MACROBLOCKD *const xd = & x->e_mbd;
|
||||
|
||||
int i;
|
||||
TOKENEXTRA *tp = cpi->tok;
|
||||
int segment_counts[MAX_MB_SEGMENTS];
|
||||
int totalrate;
|
||||
@@ -627,9 +710,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
}
|
||||
|
||||
vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
|
||||
//vp8_initialize_rd_consts( cpi, vp8_dc_quant(cpi->avg_frame_qindex, cm->y1dc_delta_q) );
|
||||
vp8cx_initialize_me_consts(cpi, cm->base_qindex);
|
||||
//vp8cx_initialize_me_consts( cpi, cpi->avg_frame_qindex);
|
||||
|
||||
// Copy data over into macro block data sturctures.
|
||||
|
||||
@@ -647,22 +728,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
|
||||
vp8_setup_block_ptrs(x);
|
||||
|
||||
x->rddiv = cpi->RDDIV;
|
||||
x->rdmult = cpi->RDMULT;
|
||||
|
||||
#if 0
|
||||
// Experimental rd code
|
||||
// 2 Pass - Possibly set Rdmult based on last frame distortion + this frame target bits or other metrics
|
||||
// such as cpi->rate_correction_factor that indicate relative complexity.
|
||||
/*if ( cpi->pass == 2 && (cpi->last_frame_distortion > 0) && (cpi->target_bits_per_mb > 0) )
|
||||
{
|
||||
//x->rdmult = ((cpi->last_frame_distortion * 256)/cpi->common.MBs)/ cpi->target_bits_per_mb;
|
||||
x->rdmult = (int)(cpi->RDMULT * cpi->rate_correction_factor);
|
||||
}
|
||||
else
|
||||
x->rdmult = cpi->RDMULT; */
|
||||
//x->rdmult = (int)(cpi->RDMULT * pow( (cpi->rate_correction_factor * 2.0), 0.75 ));
|
||||
#endif
|
||||
x->activity_sum = 0;
|
||||
|
||||
xd->mode_info_context->mbmi.mode = DC_PRED;
|
||||
xd->mode_info_context->mbmi.uv_mode = DC_PRED;
|
||||
@@ -703,11 +769,12 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
else
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
int i;
|
||||
|
||||
vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
|
||||
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
|
||||
{
|
||||
int i;
|
||||
cpi->current_mb_col_main = -1;
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
@@ -785,6 +852,11 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
totalrate += cpi->mb_row_ei[i].totalrate;
|
||||
}
|
||||
|
||||
for (i = 0; i < cpi->encoding_thread_count; i++)
|
||||
{
|
||||
x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
@@ -920,6 +992,14 @@ void vp8_encode_frame(VP8_COMP *cpi)
|
||||
cpi->last_frame_distortion = cpi->frame_distortion;
|
||||
#endif
|
||||
|
||||
/* Update the average activity for the next frame.
|
||||
* This is feed-forward for now; it could also be saved in two-pass, or
|
||||
* done during lookahead when that is eventually added.
|
||||
*/
|
||||
cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
|
||||
if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
|
||||
cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
|
||||
|
||||
}
|
||||
void vp8_setup_block_ptrs(MACROBLOCK *x)
|
||||
{
|
||||
@@ -1181,7 +1261,18 @@ int vp8cx_encode_inter_macroblock
|
||||
|
||||
if (cpi->sf.RD)
|
||||
{
|
||||
/* Are we using the fast quantizer for the mode selection? */
|
||||
if(cpi->sf.use_fastquant_for_pick)
|
||||
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
|
||||
|
||||
inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
|
||||
|
||||
/* switch back to the regular quantizer for the encode */
|
||||
if (cpi->sf.improved_quant)
|
||||
{
|
||||
cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -1214,11 +1305,25 @@ int vp8cx_encode_inter_macroblock
|
||||
// Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
|
||||
if (cpi->zbin_mode_boost_enabled)
|
||||
{
|
||||
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME))
|
||||
cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
|
||||
if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
|
||||
cpi->zbin_mode_boost = 0;
|
||||
else
|
||||
cpi->zbin_mode_boost = 0;
|
||||
{
|
||||
if (xd->mode_info_context->mbmi.mode == ZEROMV)
|
||||
{
|
||||
if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
|
||||
cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
|
||||
else
|
||||
cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
|
||||
}
|
||||
else if (xd->mode_info_context->mbmi.mode == SPLITMV)
|
||||
cpi->zbin_mode_boost = 0;
|
||||
else
|
||||
cpi->zbin_mode_boost = MV_ZBIN_BOOST;
|
||||
}
|
||||
}
|
||||
else
|
||||
cpi->zbin_mode_boost = 0;
|
||||
|
||||
vp8cx_mb_init_quantizer(cpi, x);
|
||||
}
|
||||
|
@@ -105,7 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
#if 1
|
||||
if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
|
||||
if (x->optimize)
|
||||
vp8_optimize_mby(x, rtcd);
|
||||
|
||||
#endif
|
||||
|
@@ -243,9 +243,9 @@ struct vp8_token_state{
|
||||
};
|
||||
|
||||
// TODO: experiments to find optimal multiple numbers
|
||||
#define Y1_RD_MULT 1
|
||||
#define UV_RD_MULT 1
|
||||
#define Y2_RD_MULT 4
|
||||
#define Y1_RD_MULT 4
|
||||
#define UV_RD_MULT 2
|
||||
#define Y2_RD_MULT 16
|
||||
|
||||
static const int plane_rd_mult[4]=
|
||||
{
|
||||
@@ -273,7 +273,6 @@ void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
|
||||
int x;
|
||||
int sz;
|
||||
int next;
|
||||
int path;
|
||||
int rdmult;
|
||||
int rddiv;
|
||||
int final_eob;
|
||||
@@ -309,8 +308,10 @@ void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
|
||||
eob = d->eob;
|
||||
|
||||
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
|
||||
/* TODO: These should vary with the block type, since the quantizer does. */
|
||||
rdmult = (mb->rdmult << 2)*err_mult;
|
||||
rdmult = mb->rdmult * err_mult;
|
||||
if(mb->e_mbd.mode_info_context->mbmi.ref_frame==INTRA_FRAME)
|
||||
rdmult = (rdmult * 9)>>4;
|
||||
|
||||
rddiv = mb->rddiv;
|
||||
best_mask[0] = best_mask[1] = 0;
|
||||
/* Initialize the sentinel node of the trellis. */
|
||||
@@ -633,7 +634,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
|
||||
vp8_quantize_mb(x);
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
|
||||
if (x->optimize)
|
||||
vp8_optimize_mb(x, rtcd);
|
||||
#endif
|
||||
|
||||
|
@@ -61,6 +61,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
|
||||
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
|
||||
volatile int *last_row_current_mb_col;
|
||||
INT64 activity_sum = 0;
|
||||
|
||||
if (ithread > 0)
|
||||
last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col;
|
||||
@@ -111,6 +112,12 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
|
||||
xd->left_available = (mb_col != 0);
|
||||
|
||||
x->rddiv = cpi->RDDIV;
|
||||
x->rdmult = cpi->RDMULT;
|
||||
|
||||
if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
|
||||
activity_sum += vp8_activity_masking(cpi, x);
|
||||
|
||||
// Is segmentation enabled
|
||||
// MB level adjutment to quantizer
|
||||
if (xd->segmentation_enabled)
|
||||
@@ -126,6 +133,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
else
|
||||
xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
|
||||
|
||||
x->active_ptr = cpi->active_map + seg_map_index + mb_col;
|
||||
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
{
|
||||
@@ -157,8 +165,28 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
|
||||
cpi->inter_zz_count ++;
|
||||
|
||||
}
|
||||
// Special case code for cyclic refresh
|
||||
// If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
|
||||
// during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
|
||||
if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
|
||||
{
|
||||
cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
|
||||
|
||||
// If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
|
||||
// Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
|
||||
// else mark it as dirty (1).
|
||||
if (xd->mode_info_context->mbmi.segment_id)
|
||||
cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
|
||||
else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
|
||||
{
|
||||
if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
|
||||
cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
|
||||
}
|
||||
else
|
||||
cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
|
||||
|
||||
}
|
||||
}
|
||||
cpi->tplist[mb_row].stop = *tp;
|
||||
|
||||
x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
|
||||
@@ -197,6 +225,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
||||
// this is to account for the border
|
||||
xd->mode_info_context++;
|
||||
x->partition_info++;
|
||||
x->activity_sum += activity_sum;
|
||||
|
||||
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
|
||||
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
|
||||
@@ -240,8 +269,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
z->sadperbit16 = x->sadperbit16;
|
||||
z->sadperbit4 = x->sadperbit4;
|
||||
z->errthresh = x->errthresh;
|
||||
z->rddiv = x->rddiv;
|
||||
z->rdmult = x->rdmult;
|
||||
|
||||
/*
|
||||
z->mv_col_min = x->mv_col_min;
|
||||
@@ -255,6 +282,7 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
z->vp8_short_fdct8x4 = x->vp8_short_fdct8x4;
|
||||
z->short_walsh4x4 = x->short_walsh4x4;
|
||||
z->quantize_b = x->quantize_b;
|
||||
z->optimize = x->optimize;
|
||||
|
||||
/*
|
||||
z->mvc = x->mvc;
|
||||
@@ -282,6 +310,7 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
|
||||
for (i = 0; i < 25; i++)
|
||||
{
|
||||
z->block[i].quant = x->block[i].quant;
|
||||
z->block[i].quant_fast = x->block[i].quant_fast;
|
||||
z->block[i].quant_shift = x->block[i].quant_shift;
|
||||
z->block[i].zbin = x->block[i].zbin;
|
||||
z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
|
||||
@@ -392,8 +421,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
|
||||
|
||||
vp8_setup_block_ptrs(mb);
|
||||
|
||||
mb->rddiv = cpi->RDDIV;
|
||||
mb->rdmult = cpi->RDMULT;
|
||||
mb->activity_sum = 0;
|
||||
|
||||
mbd->left_context = &cm->left_context;
|
||||
mb->mvc = cm->fc.mvc;
|
||||
|
@@ -53,8 +53,11 @@ extern const int vp8_gf_boost_qadjustment[QINDEX_RANGE];
|
||||
#define IIFACTOR 1.4
|
||||
#define IIKFACTOR1 1.40
|
||||
#define IIKFACTOR2 1.5
|
||||
#define RMAX 14.0
|
||||
#define GF_RMAX 48.0 // 128.0
|
||||
#define RMAX 14.0
|
||||
#define GF_RMAX 48.0
|
||||
|
||||
#define KF_MB_INTRA_MIN 300
|
||||
#define GF_MB_INTRA_MIN 200
|
||||
|
||||
#define DOUBLE_DIVIDE_CHECK(X) ((X)<0?(X)-.000001:(X)+.000001)
|
||||
|
||||
@@ -65,6 +68,18 @@ static int vscale_lookup[7] = {0, 1, 1, 2, 2, 3, 3};
|
||||
static int hscale_lookup[7] = {0, 0, 1, 1, 2, 2, 3};
|
||||
|
||||
|
||||
const int cq_level[QINDEX_RANGE] =
|
||||
{
|
||||
0,0,1,1,2,3,3,4,4,5,6,6,7,8,8,9,
|
||||
9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,20,
|
||||
20,21,22,22,23,24,24,25,26,27,27,28,29,30,30,31,
|
||||
32,33,33,34,35,36,36,37,38,39,39,40,41,42,42,43,
|
||||
44,45,46,46,47,48,49,50,50,51,52,53,54,55,55,56,
|
||||
57,58,59,60,60,61,62,63,64,65,66,67,67,68,69,70,
|
||||
71,72,73,74,75,75,76,77,78,79,80,81,82,83,84,85,
|
||||
86,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
|
||||
};
|
||||
|
||||
void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame);
|
||||
int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps);
|
||||
|
||||
@@ -247,7 +262,6 @@ extern size_t vp8_firstpass_stats_sz(unsigned int mb_count)
|
||||
* macroblock.
|
||||
*/
|
||||
size_t stats_sz;
|
||||
FIRSTPASS_STATS stats;
|
||||
|
||||
stats_sz = sizeof(FIRSTPASS_STATS) + mb_count;
|
||||
stats_sz = (stats_sz + 7) & ~7;
|
||||
@@ -374,8 +388,6 @@ unsigned char *vp8_fpmm_get_pos(VP8_COMP *cpi)
|
||||
}
|
||||
void vp8_fpmm_reset_pos(VP8_COMP *cpi, unsigned char *target_pos)
|
||||
{
|
||||
int Offset;
|
||||
|
||||
cpi->fp_motion_map_stats = target_pos;
|
||||
}
|
||||
|
||||
@@ -472,7 +484,7 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *
|
||||
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
|
||||
|
||||
// Initial step/diamond search centred on best mv
|
||||
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost);
|
||||
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv);
|
||||
if ( tmp_err < INT_MAX-new_mv_mode_penalty )
|
||||
tmp_err += new_mv_mode_penalty;
|
||||
|
||||
@@ -495,7 +507,7 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *
|
||||
num00--;
|
||||
else
|
||||
{
|
||||
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost);
|
||||
tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv);
|
||||
if ( tmp_err < INT_MAX-new_mv_mode_penalty )
|
||||
tmp_err += new_mv_mode_penalty;
|
||||
|
||||
@@ -905,7 +917,7 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_
|
||||
double pow_lowq = 0.40;
|
||||
|
||||
if (section_target_bandwitdh <= 0)
|
||||
return MAXQ;
|
||||
return cpi->maxq_max_limit; // Highest value allowed
|
||||
|
||||
target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs);
|
||||
|
||||
@@ -941,10 +953,12 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_
|
||||
|
||||
// Correction factor used for Q values >= 20
|
||||
corr_high = pow(err_per_mb / BASE_ERRPERMB, pow_highq);
|
||||
corr_high = (corr_high < 0.05) ? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high;
|
||||
corr_high = (corr_high < 0.05)
|
||||
? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high;
|
||||
|
||||
// Try and pick a Q that should be high enough to encode the content at the given rate.
|
||||
for (Q = 0; Q < MAXQ; Q++)
|
||||
// Try and pick a max Q that will be high enough to encode the
|
||||
// content at the given rate.
|
||||
for (Q = cpi->maxq_min_limit; Q < cpi->maxq_max_limit; Q++)
|
||||
{
|
||||
int bits_per_mb_at_this_q;
|
||||
|
||||
@@ -963,6 +977,28 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_
|
||||
break;
|
||||
}
|
||||
|
||||
// Restriction on active max q for constrained quality mode.
|
||||
if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
|
||||
(Q < cpi->cq_target_quality) )
|
||||
//(Q < cpi->oxcf.cq_level;) )
|
||||
{
|
||||
Q = cpi->cq_target_quality;
|
||||
//Q = cpi->oxcf.cq_level;
|
||||
}
|
||||
|
||||
// Adjust maxq_min_limit and maxq_max_limit limits based on
|
||||
// averaga q observed in clip for non kf/gf.arf frames
|
||||
// Give average a chance to settle though.
|
||||
if ( (cpi->ni_frames >
|
||||
((unsigned int)cpi->total_stats->count >> 8)) &&
|
||||
(cpi->ni_frames > 150) )
|
||||
{
|
||||
cpi->maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality)
|
||||
? (cpi->ni_av_qi + 32) : cpi->worst_quality;
|
||||
cpi->maxq_min_limit = ((cpi->ni_av_qi - 32) > cpi->best_quality)
|
||||
? (cpi->ni_av_qi - 32) : cpi->best_quality;
|
||||
}
|
||||
|
||||
return Q;
|
||||
}
|
||||
static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, int Height, int Width)
|
||||
@@ -1111,6 +1147,79 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
|
||||
|
||||
return Q;
|
||||
}
|
||||
|
||||
// For cq mode estimate a cq level that matches the observed
|
||||
// complexity and data rate.
|
||||
static int estimate_cq(VP8_COMP *cpi, double section_err,
|
||||
int section_target_bandwitdh, int Height, int Width)
|
||||
{
|
||||
int Q;
|
||||
int num_mbs = ((Height * Width) / (16 * 16));
|
||||
int target_norm_bits_per_mb;
|
||||
|
||||
double err_per_mb = section_err / num_mbs;
|
||||
double correction_factor;
|
||||
double corr_high;
|
||||
double speed_correction = 1.0;
|
||||
double pow_highq = 0.90;
|
||||
double pow_lowq = 0.40;
|
||||
double clip_iiratio;
|
||||
double clip_iifactor;
|
||||
|
||||
target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20))
|
||||
? (512 * section_target_bandwitdh) / num_mbs
|
||||
: 512 * (section_target_bandwitdh / num_mbs);
|
||||
|
||||
// Corrections for higher compression speed settings
|
||||
// (reduced compression expected)
|
||||
if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1))
|
||||
{
|
||||
if (cpi->oxcf.cpu_used <= 5)
|
||||
speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04);
|
||||
else
|
||||
speed_correction = 1.25;
|
||||
}
|
||||
// II ratio correction factor for clip as a whole
|
||||
clip_iiratio = cpi->total_stats->intra_error /
|
||||
DOUBLE_DIVIDE_CHECK(cpi->total_stats->coded_error);
|
||||
clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025);
|
||||
if (clip_iifactor < 0.80)
|
||||
clip_iifactor = 0.80;
|
||||
|
||||
// Correction factor used for Q values >= 20
|
||||
corr_high = pow(err_per_mb / BASE_ERRPERMB, pow_highq);
|
||||
corr_high = (corr_high < 0.05) ? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high;
|
||||
|
||||
// Try and pick a Q that can encode the content at the given rate.
|
||||
for (Q = 0; Q < MAXQ; Q++)
|
||||
{
|
||||
int bits_per_mb_at_this_q;
|
||||
|
||||
if (Q < 50)
|
||||
{
|
||||
correction_factor =
|
||||
pow( err_per_mb / BASE_ERRPERMB, (pow_lowq + Q * 0.01));
|
||||
|
||||
correction_factor = (correction_factor < 0.05) ? 0.05
|
||||
: (correction_factor > 5.0) ? 5.0
|
||||
: correction_factor;
|
||||
}
|
||||
else
|
||||
correction_factor = corr_high;
|
||||
|
||||
bits_per_mb_at_this_q =
|
||||
(int)( .5 + correction_factor *
|
||||
speed_correction *
|
||||
clip_iifactor *
|
||||
(double)vp8_bits_per_mb[INTER_FRAME][Q] / 1.0);
|
||||
|
||||
if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
|
||||
break;
|
||||
}
|
||||
|
||||
return cq_level[Q];
|
||||
}
|
||||
|
||||
extern void vp8_new_frame_rate(VP8_COMP *cpi, double framerate);
|
||||
|
||||
void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
@@ -1145,6 +1254,14 @@ void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
cpi->output_frame_rate = cpi->oxcf.frame_rate;
|
||||
cpi->bits_left = (long long)(cpi->total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
|
||||
cpi->bits_left -= (long long)(cpi->total_stats->duration * two_pass_min_rate / 10000000.0);
|
||||
cpi->clip_bits_total = cpi->bits_left;
|
||||
|
||||
// Calculate a minimum intra value to be used in determining the IIratio
|
||||
// scores used in the second pass. We have this minimum to make sure
|
||||
// that clips that are static but "low complexity" in the intra domain
|
||||
// are still boosted appropriately for KF/GF/ARF
|
||||
cpi->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
|
||||
cpi->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
|
||||
|
||||
vp8_avg_stats(cpi->total_stats);
|
||||
|
||||
@@ -1173,17 +1290,25 @@ void vp8_init_second_pass(VP8_COMP *cpi)
|
||||
{
|
||||
start_pos = cpi->stats_in; // Note starting "file" position
|
||||
|
||||
cpi->modified_total_error_left = 0.0;
|
||||
cpi->modified_error_total = 0.0;
|
||||
cpi->modified_error_used = 0.0;
|
||||
|
||||
while (vp8_input_stats(cpi, &this_frame) != EOF)
|
||||
{
|
||||
cpi->modified_total_error_left += calculate_modified_err(cpi, &this_frame);
|
||||
cpi->modified_error_total += calculate_modified_err(cpi, &this_frame);
|
||||
}
|
||||
cpi->modified_error_left = cpi->modified_error_total;
|
||||
|
||||
reset_fpf_position(cpi, start_pos); // Reset file position
|
||||
|
||||
}
|
||||
|
||||
// Calculate the clip target modified bits per error
|
||||
// The observed bpe starts as the same number.
|
||||
cpi->clip_bpe = cpi->bits_left /
|
||||
DOUBLE_DIVIDE_CHECK(cpi->modified_error_total);
|
||||
cpi->observed_bpe = cpi->clip_bpe;
|
||||
|
||||
cpi->fp_motion_map_stats = (unsigned char *)cpi->stats_in;
|
||||
}
|
||||
|
||||
@@ -1250,7 +1375,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
// what level of boost is appropriate for the GF or ARF that will be coded with the group
|
||||
i = 0;
|
||||
|
||||
while (((i < cpi->max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key))
|
||||
while (((i < cpi->static_scene_max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key))
|
||||
{
|
||||
double r;
|
||||
double this_frame_mvr_ratio;
|
||||
@@ -1308,6 +1433,13 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
// Underlying boost factor is based on inter intra error ratio
|
||||
r = (boost_factor * (next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error)));
|
||||
|
||||
if (next_frame.intra_error > cpi->gf_intra_err_min)
|
||||
r = (IIKFACTOR2 * next_frame.intra_error /
|
||||
DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
|
||||
else
|
||||
r = (IIKFACTOR2 * cpi->gf_intra_err_min /
|
||||
DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
|
||||
|
||||
// Increase boost for frames where new data coming into frame (eg zoom out)
|
||||
// Slightly reduce boost if there is a net balance of motion out of the frame (zoom in)
|
||||
// The range for this_frame_mv_in_out is -1.0 to +1.0
|
||||
@@ -1353,18 +1485,20 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
boost_score += (decay_accumulator * r);
|
||||
|
||||
// Break out conditions.
|
||||
if ( /* i>4 || */
|
||||
if ( /* i>4 || */
|
||||
// Break at cpi->max_gf_interval unless almost totally static
|
||||
(i >= cpi->max_gf_interval && (loop_decay_rate < 0.99)) ||
|
||||
(
|
||||
(i > MIN_GF_INTERVAL) && // Dont break out with a very short interval
|
||||
((cpi->frames_to_key - i) >= MIN_GF_INTERVAL) && // Dont break out very close to a key frame
|
||||
// Dont break out with a very short interval
|
||||
(i > MIN_GF_INTERVAL) &&
|
||||
// Dont break out very close to a key frame
|
||||
((cpi->frames_to_key - i) >= MIN_GF_INTERVAL) &&
|
||||
((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) &&
|
||||
((mv_ratio_accumulator > 100.0) ||
|
||||
(abs_mv_in_out_accumulator > 3.0) ||
|
||||
(mv_in_out_accumulator < -2.0) ||
|
||||
((boost_score - old_boost_score) < 2.0)
|
||||
)
|
||||
)
|
||||
)
|
||||
((boost_score - old_boost_score) < 2.0))
|
||||
) )
|
||||
{
|
||||
boost_score = old_boost_score;
|
||||
break;
|
||||
@@ -1439,7 +1573,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
// Boost for arf frame
|
||||
Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100);
|
||||
Boost += (cpi->baseline_gf_interval * 50);
|
||||
Boost += (i * 50);
|
||||
allocation_chunks = (i * 100) + Boost;
|
||||
|
||||
// Normalize Altboost and allocations chunck down to prevent overflow
|
||||
@@ -1585,6 +1719,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
// Reset the file position
|
||||
reset_fpf_position(cpi, start_pos);
|
||||
|
||||
// Update the record of error used so far (only done once per gf group)
|
||||
cpi->modified_error_used += gf_group_err;
|
||||
|
||||
// Assign bits to the arf or gf.
|
||||
{
|
||||
int Boost;
|
||||
@@ -1738,17 +1875,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
vp8_avg_stats(§ionstats);
|
||||
|
||||
if (sectionstats.pcnt_motion < .17)
|
||||
cpi->section_is_low_motion = 1;
|
||||
else
|
||||
cpi->section_is_low_motion = 0;
|
||||
|
||||
if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45)
|
||||
cpi->section_is_fast_motion = 1;
|
||||
else
|
||||
cpi->section_is_fast_motion = 0;
|
||||
|
||||
cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
cpi->section_intra_rating =
|
||||
sectionstats.intra_error /
|
||||
DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
|
||||
Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
//if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) )
|
||||
@@ -1892,6 +2021,16 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
// Is this a GF / ARF (Note that a KF is always also a GF)
|
||||
if (cpi->frames_till_gf_update_due == 0)
|
||||
{
|
||||
// Update monitor of the bits per error observed so far.
|
||||
// Done once per gf group based on what has gone before
|
||||
// so do nothing if this is the first frame.
|
||||
if (cpi->common.current_video_frame > 0)
|
||||
{
|
||||
cpi->observed_bpe =
|
||||
(double)(cpi->clip_bits_total - cpi->bits_left) /
|
||||
cpi->modified_error_used;
|
||||
}
|
||||
|
||||
// Define next gf group and assign bits to it
|
||||
vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
|
||||
define_gf_group(cpi, &this_frame_copy);
|
||||
@@ -1965,22 +2104,56 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
|
||||
if (cpi->common.current_video_frame == 0)
|
||||
{
|
||||
// guess at 2nd pass q
|
||||
cpi->est_max_qcorrection_factor = 1.0;
|
||||
tmp_q = estimate_max_q(cpi, (cpi->total_coded_error_left / frames_left), (int)(cpi->bits_left / frames_left), cpi->common.Height, cpi->common.Width);
|
||||
|
||||
if (tmp_q < cpi->worst_quality)
|
||||
// Experimental code to try and set a cq_level in constrained
|
||||
// quality mode.
|
||||
if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY )
|
||||
{
|
||||
cpi->active_worst_quality = tmp_q;
|
||||
cpi->ni_av_qi = tmp_q;
|
||||
}
|
||||
else
|
||||
{
|
||||
cpi->active_worst_quality = cpi->worst_quality;
|
||||
cpi->ni_av_qi = cpi->worst_quality;
|
||||
int est_cq;
|
||||
|
||||
est_cq =
|
||||
estimate_cq( cpi,
|
||||
(cpi->total_coded_error_left / frames_left),
|
||||
(int)(cpi->bits_left / frames_left),
|
||||
cpi->common.Height, cpi->common.Width);
|
||||
|
||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
||||
if ( est_cq > cpi->cq_target_quality )
|
||||
cpi->cq_target_quality = est_cq;
|
||||
}
|
||||
|
||||
// guess at maxq needed in 2nd pass
|
||||
cpi->maxq_max_limit = cpi->worst_quality;
|
||||
cpi->maxq_min_limit = cpi->best_quality;
|
||||
tmp_q = estimate_max_q( cpi,
|
||||
(cpi->total_coded_error_left / frames_left),
|
||||
(int)(cpi->bits_left / frames_left),
|
||||
cpi->common.Height,
|
||||
cpi->common.Width);
|
||||
|
||||
// Limit the maxq value returned subsequently.
|
||||
// This increases the risk of overspend or underspend if the initial
|
||||
// estimate for the clip is bad, but helps prevent excessive
|
||||
// variation in Q, especially near the end of a clip
|
||||
// where for example a small overspend may cause Q to crash
|
||||
cpi->maxq_max_limit = ((tmp_q + 32) < cpi->worst_quality)
|
||||
? (tmp_q + 32) : cpi->worst_quality;
|
||||
cpi->maxq_min_limit = ((tmp_q - 32) > cpi->best_quality)
|
||||
? (tmp_q - 32) : cpi->best_quality;
|
||||
|
||||
cpi->active_worst_quality = tmp_q;
|
||||
cpi->ni_av_qi = tmp_q;
|
||||
}
|
||||
else
|
||||
|
||||
// The last few frames of a clip almost always have to few or too many
|
||||
// bits and for the sake of over exact rate control we dont want to make
|
||||
// radical adjustments to the allowed quantizer range just to use up a
|
||||
// few surplus bits or get beneath the target rate.
|
||||
else if ( (cpi->common.current_video_frame <
|
||||
(((unsigned int)cpi->total_stats->count * 255)>>8)) &&
|
||||
((cpi->common.current_video_frame + cpi->baseline_gf_interval) <
|
||||
(unsigned int)cpi->total_stats->count) )
|
||||
{
|
||||
if (frames_left < 1)
|
||||
frames_left = 1;
|
||||
@@ -1994,13 +2167,6 @@ void vp8_second_pass(VP8_COMP *cpi)
|
||||
cpi->active_worst_quality --;
|
||||
|
||||
cpi->active_worst_quality = ((cpi->active_worst_quality * 3) + tmp_q + 2) / 4;
|
||||
|
||||
// Clamp to user set limits
|
||||
if (cpi->active_worst_quality > cpi->worst_quality)
|
||||
cpi->active_worst_quality = cpi->worst_quality;
|
||||
else if (cpi->active_worst_quality < cpi->best_quality)
|
||||
cpi->active_worst_quality = cpi->best_quality;
|
||||
|
||||
}
|
||||
|
||||
cpi->frames_to_key --;
|
||||
@@ -2122,6 +2288,9 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
cpi->common.frame_type = KEY_FRAME;
|
||||
|
||||
// is this a forced key frame by interval
|
||||
cpi->this_key_frame_forced = cpi->next_key_frame_forced;
|
||||
|
||||
// Clear the alt ref active flag as this can never be active on a key frame
|
||||
cpi->source_alt_ref_active = FALSE;
|
||||
|
||||
@@ -2184,7 +2353,11 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
kf_group_err /= 2.0;
|
||||
kf_group_intra_err /= 2.0;
|
||||
kf_group_coded_err /= 2.0;
|
||||
|
||||
cpi->next_key_frame_forced = TRUE;
|
||||
}
|
||||
else
|
||||
cpi->next_key_frame_forced = FALSE;
|
||||
|
||||
// Special case for the last frame of the file
|
||||
if (cpi->stats_in >= cpi->stats_in_end)
|
||||
@@ -2199,7 +2372,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
}
|
||||
|
||||
// Calculate the number of bits that should be assigned to the kf group.
|
||||
if ((cpi->bits_left > 0) && ((int)cpi->modified_total_error_left > 0))
|
||||
if ((cpi->bits_left > 0) && ((int)cpi->modified_error_left > 0))
|
||||
{
|
||||
// Max for a single normal frame (not key frame)
|
||||
int max_bits = frame_max_bits(cpi);
|
||||
@@ -2211,7 +2384,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
// complexity of the section
|
||||
cpi->kf_group_bits = (long long)( cpi->bits_left *
|
||||
( kf_group_err /
|
||||
cpi->modified_total_error_left ));
|
||||
cpi->modified_error_left ));
|
||||
|
||||
// Clip based on maximum per frame rate defined by the user.
|
||||
max_grp_bits = (long long)max_bits * (long long)cpi->frames_to_key;
|
||||
@@ -2283,7 +2456,12 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
if (EOF == vp8_input_stats(cpi, &next_frame))
|
||||
break;
|
||||
|
||||
r = (IIKFACTOR2 * next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error)) ;
|
||||
if (next_frame.intra_error > cpi->kf_intra_err_min)
|
||||
r = (IIKFACTOR2 * next_frame.intra_error /
|
||||
DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
|
||||
else
|
||||
r = (IIKFACTOR2 * cpi->kf_intra_err_min /
|
||||
DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
|
||||
|
||||
if (r > RMAX)
|
||||
r = RMAX;
|
||||
@@ -2344,17 +2522,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
vp8_avg_stats(§ionstats);
|
||||
|
||||
if (sectionstats.pcnt_motion < .17)
|
||||
cpi->section_is_low_motion = 1;
|
||||
else
|
||||
cpi->section_is_low_motion = 0;
|
||||
|
||||
if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45)
|
||||
cpi->section_is_fast_motion = 1;
|
||||
else
|
||||
cpi->section_is_fast_motion = 0;
|
||||
|
||||
cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
|
||||
Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
|
||||
// if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) )
|
||||
@@ -2434,7 +2602,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
kf_boost = (int)((double)kf_boost * 100.0) >> 4; // Scale 16 to 100
|
||||
|
||||
// Adjustment to boost based on recent average q
|
||||
kf_boost = kf_boost * vp8_kf_boost_qadjustment[cpi->ni_av_qi] / 100;
|
||||
//kf_boost = kf_boost * vp8_kf_boost_qadjustment[cpi->ni_av_qi] / 100;
|
||||
|
||||
if (kf_boost < 250) // Min KF boost
|
||||
kf_boost = 250;
|
||||
@@ -2474,7 +2642,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
double alt_kf_grp_bits =
|
||||
((double)cpi->bits_left *
|
||||
(kf_mod_err * (double)cpi->frames_to_key) /
|
||||
DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left));
|
||||
DOUBLE_DIVIDE_CHECK(cpi->modified_error_left));
|
||||
|
||||
alt_kf_bits = (int)((double)kf_boost *
|
||||
(alt_kf_grp_bits / (double)allocation_chunks));
|
||||
@@ -2492,7 +2660,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
alt_kf_bits =
|
||||
(int)((double)cpi->bits_left *
|
||||
(kf_mod_err /
|
||||
DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left)));
|
||||
DOUBLE_DIVIDE_CHECK(cpi->modified_error_left)));
|
||||
|
||||
if (alt_kf_bits > cpi->kf_bits)
|
||||
{
|
||||
@@ -2512,7 +2680,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
||||
|
||||
// Adjust the count of total modified error left.
|
||||
// The count of bits left is adjusted elsewhere based on real coded frame sizes
|
||||
cpi->modified_total_error_left -= kf_group_err;
|
||||
cpi->modified_error_left -= kf_group_err;
|
||||
|
||||
if (cpi->oxcf.allow_spatial_resampling)
|
||||
{
|
||||
|
@@ -40,6 +40,12 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_c;
|
||||
cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_c;
|
||||
|
||||
cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_c;
|
||||
cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_c;
|
||||
cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_c;
|
||||
cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_c;
|
||||
cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_c;
|
||||
|
||||
cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_c;
|
||||
cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_c;
|
||||
cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_c;
|
||||
@@ -85,9 +91,12 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
|
||||
|
||||
cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
cpi->rtcd.search.full_search = vp8_full_search_sad;
|
||||
#endif
|
||||
cpi->rtcd.search.diamond_search = vp8_diamond_search_sad;
|
||||
|
||||
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
|
||||
#endif
|
||||
|
||||
// Pure C:
|
||||
|
@@ -408,6 +408,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
|
||||
diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
|
||||
break;
|
||||
case 3:
|
||||
default:
|
||||
this_mv.col += 4;
|
||||
this_mv.row += 4;
|
||||
diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
|
||||
@@ -913,7 +914,8 @@ int vp8_diamond_search_sad
|
||||
int *num00,
|
||||
vp8_variance_fn_ptr_t *fn_ptr,
|
||||
int *mvsadcost[2],
|
||||
int *mvcost[2]
|
||||
int *mvcost[2],
|
||||
MV *center_mv
|
||||
)
|
||||
{
|
||||
int i, j, step;
|
||||
@@ -940,6 +942,8 @@ int vp8_diamond_search_sad
|
||||
unsigned char *check_here;
|
||||
int thissad;
|
||||
|
||||
*num00 = 0;
|
||||
|
||||
// Work out the start point for the search
|
||||
in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
|
||||
best_address = in_what;
|
||||
@@ -949,7 +953,7 @@ int vp8_diamond_search_sad
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Check the starting position
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// search_param determines the length of the initial step and hence the number of iterations
|
||||
@@ -961,8 +965,6 @@ int vp8_diamond_search_sad
|
||||
best_mv->row = ref_row;
|
||||
best_mv->col = ref_col;
|
||||
|
||||
*num00 = 0;
|
||||
|
||||
for (step = 0; step < tot_steps ; step++)
|
||||
{
|
||||
for (j = 0 ; j < x->searches_per_step ; j++)
|
||||
@@ -982,7 +984,7 @@ int vp8_diamond_search_sad
|
||||
{
|
||||
this_mv.row = this_row_offset << 3;
|
||||
this_mv.col = this_col_offset << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1013,7 +1015,7 @@ int vp8_diamond_search_sad
|
||||
return INT_MAX;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
}
|
||||
|
||||
int vp8_diamond_search_sadx4
|
||||
@@ -1028,7 +1030,8 @@ int vp8_diamond_search_sadx4
|
||||
int *num00,
|
||||
vp8_variance_fn_ptr_t *fn_ptr,
|
||||
int *mvsadcost[2],
|
||||
int *mvcost[2]
|
||||
int *mvcost[2],
|
||||
MV *center_mv
|
||||
)
|
||||
{
|
||||
int i, j, step;
|
||||
@@ -1055,6 +1058,8 @@ int vp8_diamond_search_sadx4
|
||||
unsigned char *check_here;
|
||||
unsigned int thissad;
|
||||
|
||||
*num00 = 0;
|
||||
|
||||
// Work out the start point for the search
|
||||
in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
|
||||
best_address = in_what;
|
||||
@@ -1064,7 +1069,7 @@ int vp8_diamond_search_sadx4
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Check the starting position
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// search_param determines the length of the initial step and hence the number of iterations
|
||||
@@ -1076,8 +1081,6 @@ int vp8_diamond_search_sadx4
|
||||
best_mv->row = ref_row;
|
||||
best_mv->col = ref_col;
|
||||
|
||||
*num00 = 0;
|
||||
|
||||
for (step = 0; step < tot_steps ; step++)
|
||||
{
|
||||
int all_in = 1, t;
|
||||
@@ -1108,7 +1111,7 @@ int vp8_diamond_search_sadx4
|
||||
{
|
||||
this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
|
||||
this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
|
||||
sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
sad_array[t] += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (sad_array[t] < bestsad)
|
||||
{
|
||||
@@ -1137,7 +1140,7 @@ int vp8_diamond_search_sadx4
|
||||
{
|
||||
this_mv.row = this_row_offset << 3;
|
||||
this_mv.col = this_col_offset << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1168,12 +1171,12 @@ int vp8_diamond_search_sadx4
|
||||
return INT_MAX;
|
||||
|
||||
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
}
|
||||
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
|
||||
int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
|
||||
{
|
||||
unsigned char *what = (*(b->base_src) + b->src);
|
||||
int what_stride = b->src_stride;
|
||||
@@ -1211,7 +1214,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
|
||||
// Baseline value at the centre
|
||||
|
||||
//bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
|
||||
@@ -1239,7 +1242,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
|
||||
this_mv.col = c << 3;
|
||||
//thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
|
||||
//thissad += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
|
||||
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1258,12 +1261,12 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro
|
||||
|
||||
if (bestsad < INT_MAX)
|
||||
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
else
|
||||
return INT_MAX;
|
||||
}
|
||||
|
||||
int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
|
||||
int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
|
||||
{
|
||||
unsigned char *what = (*(b->base_src) + b->src);
|
||||
int what_stride = b->src_stride;
|
||||
@@ -1301,7 +1304,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Baseline value at the centre
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
|
||||
@@ -1323,7 +1326,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
check_here = r * mv_stride + in_what + col_min;
|
||||
c = col_min;
|
||||
|
||||
while ((c + 3) < col_max)
|
||||
while ((c + 2) < col_max)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -1336,7 +1339,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1359,7 +1362,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
@@ -1381,12 +1384,163 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
|
||||
|
||||
if (bestsad < INT_MAX)
|
||||
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
else
|
||||
return INT_MAX;
|
||||
}
|
||||
#endif
|
||||
|
||||
int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
|
||||
{
|
||||
unsigned char *what = (*(b->base_src) + b->src);
|
||||
int what_stride = b->src_stride;
|
||||
unsigned char *in_what;
|
||||
int in_what_stride = d->pre_stride;
|
||||
int mv_stride = d->pre_stride;
|
||||
unsigned char *bestaddress;
|
||||
MV *best_mv = &d->bmi.mv.as_mv;
|
||||
MV this_mv;
|
||||
int bestsad = INT_MAX;
|
||||
int r, c;
|
||||
|
||||
unsigned char *check_here;
|
||||
unsigned int thissad;
|
||||
|
||||
int ref_row = ref_mv->row >> 3;
|
||||
int ref_col = ref_mv->col >> 3;
|
||||
|
||||
int row_min = ref_row - distance;
|
||||
int row_max = ref_row + distance;
|
||||
int col_min = ref_col - distance;
|
||||
int col_max = ref_col + distance;
|
||||
|
||||
unsigned short sad_array8[8];
|
||||
unsigned int sad_array[3];
|
||||
|
||||
// Work out the mid point for the search
|
||||
in_what = *(d->base_pre) + d->pre;
|
||||
bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
|
||||
|
||||
best_mv->row = ref_row;
|
||||
best_mv->col = ref_col;
|
||||
|
||||
// We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
|
||||
if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
|
||||
(ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
|
||||
{
|
||||
// Baseline value at the centre
|
||||
bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
|
||||
}
|
||||
|
||||
// Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
|
||||
if (col_min < x->mv_col_min)
|
||||
col_min = x->mv_col_min;
|
||||
|
||||
if (col_max > x->mv_col_max)
|
||||
col_max = x->mv_col_max;
|
||||
|
||||
if (row_min < x->mv_row_min)
|
||||
row_min = x->mv_row_min;
|
||||
|
||||
if (row_max > x->mv_row_max)
|
||||
row_max = x->mv_row_max;
|
||||
|
||||
for (r = row_min; r < row_max ; r++)
|
||||
{
|
||||
this_mv.row = r << 3;
|
||||
check_here = r * mv_stride + in_what + col_min;
|
||||
c = col_min;
|
||||
|
||||
while ((c + 7) < col_max)
|
||||
{
|
||||
int i;
|
||||
|
||||
fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
thissad = (unsigned int)sad_array8[i];
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
bestsad = thissad;
|
||||
best_mv->row = r;
|
||||
best_mv->col = c;
|
||||
bestaddress = check_here;
|
||||
}
|
||||
}
|
||||
|
||||
check_here++;
|
||||
c++;
|
||||
}
|
||||
}
|
||||
|
||||
while ((c + 2) < col_max)
|
||||
{
|
||||
int i;
|
||||
|
||||
fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
thissad = sad_array[i];
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
bestsad = thissad;
|
||||
best_mv->row = r;
|
||||
best_mv->col = c;
|
||||
bestaddress = check_here;
|
||||
}
|
||||
}
|
||||
|
||||
check_here++;
|
||||
c++;
|
||||
}
|
||||
}
|
||||
|
||||
while (c < col_max)
|
||||
{
|
||||
thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
this_mv.col = c << 3;
|
||||
thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
|
||||
|
||||
if (thissad < bestsad)
|
||||
{
|
||||
bestsad = thissad;
|
||||
best_mv->row = r;
|
||||
best_mv->col = c;
|
||||
bestaddress = check_here;
|
||||
}
|
||||
}
|
||||
|
||||
check_here ++;
|
||||
c ++;
|
||||
}
|
||||
}
|
||||
|
||||
this_mv.row = best_mv->row << 3;
|
||||
this_mv.col = best_mv->col << 3;
|
||||
|
||||
if (bestsad < INT_MAX)
|
||||
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
|
||||
+ vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
|
||||
else
|
||||
return INT_MAX;
|
||||
}
|
||||
#endif /* !(CONFIG_REALTIME_ONLY) */
|
||||
|
||||
#ifdef ENTROPY_STATS
|
||||
void print_mode_context(void)
|
||||
|
@@ -25,7 +25,6 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
|
||||
#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS+3)) - 8) // Max full pel mv specified in 1/8 pel units
|
||||
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units
|
||||
|
||||
|
||||
extern void print_mode_context(void);
|
||||
extern int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight);
|
||||
extern void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride);
|
||||
@@ -67,7 +66,8 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
|
||||
int distance, \
|
||||
vp8_variance_fn_ptr_t *fn_ptr, \
|
||||
int *mvcost[2], \
|
||||
int *mvsadcost[2] \
|
||||
int *mvsadcost[2], \
|
||||
MV *center_mv \
|
||||
)
|
||||
|
||||
#define prototype_diamond_search_sad(sym)\
|
||||
@@ -83,7 +83,8 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
|
||||
int *num00, \
|
||||
vp8_variance_fn_ptr_t *fn_ptr, \
|
||||
int *mvsadcost[2], \
|
||||
int *mvcost[2] \
|
||||
int *mvcost[2], \
|
||||
MV *center_mv \
|
||||
)
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
@@ -93,6 +94,7 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
|
||||
typedef prototype_full_search_sad(*vp8_full_search_fn_t);
|
||||
extern prototype_full_search_sad(vp8_full_search_sad);
|
||||
extern prototype_full_search_sad(vp8_full_search_sadx3);
|
||||
extern prototype_full_search_sad(vp8_full_search_sadx8);
|
||||
|
||||
typedef prototype_diamond_search_sad(*vp8_diamond_search_fn_t);
|
||||
extern prototype_diamond_search_sad(vp8_diamond_search_sad);
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,6 @@
|
||||
#include "treewriter.h"
|
||||
#include "tokenize.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "preproc.h"
|
||||
#include "variance.h"
|
||||
#include "dct.h"
|
||||
#include "encodemb.h"
|
||||
@@ -28,6 +27,7 @@
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "mcomp.h"
|
||||
#include "temporal_filter.h"
|
||||
|
||||
//#define SPEEDSTATS 1
|
||||
#define MIN_GF_INTERVAL 4
|
||||
@@ -46,6 +46,8 @@
|
||||
#define MAX_THRESHMULT 512
|
||||
|
||||
#define GF_ZEROMV_ZBIN_BOOST 24
|
||||
#define LF_ZEROMV_ZBIN_BOOST 12
|
||||
#define MV_ZBIN_BOOST 4
|
||||
#define ZBIN_OQ_MAX 192
|
||||
|
||||
#define VP8_TEMPORAL_ALT_REF 1
|
||||
@@ -180,6 +182,9 @@ typedef struct
|
||||
int first_step;
|
||||
int optimize_coefficients;
|
||||
|
||||
int use_fastquant_for_pick;
|
||||
int no_skip_block4x4_search;
|
||||
|
||||
} SPEED_FEATURES;
|
||||
|
||||
typedef struct
|
||||
@@ -227,6 +232,7 @@ typedef struct VP8_ENCODER_RTCD
|
||||
vp8_encodemb_rtcd_vtable_t encodemb;
|
||||
vp8_quantize_rtcd_vtable_t quantize;
|
||||
vp8_search_rtcd_vtable_t search;
|
||||
vp8_temporal_rtcd_vtable_t temporal;
|
||||
} VP8_ENCODER_RTCD;
|
||||
|
||||
enum
|
||||
@@ -239,6 +245,12 @@ enum
|
||||
BLOCK_MAX_SEGMENTS
|
||||
};
|
||||
|
||||
typedef union
|
||||
{
|
||||
unsigned int as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates rapid equality tests */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
||||
@@ -260,6 +272,9 @@ typedef struct
|
||||
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, Y1quant_fast[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, Y2quant_fast[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, UVquant_fast[QINDEX_RANGE][16]);
|
||||
|
||||
|
||||
MACROBLOCK mb;
|
||||
@@ -276,14 +291,14 @@ typedef struct
|
||||
unsigned int source_frame_flags;
|
||||
YV12_BUFFER_CONFIG scaled_source;
|
||||
|
||||
int source_buffer_count;
|
||||
int source_encode_index;
|
||||
int source_alt_ref_pending;
|
||||
int source_alt_ref_active;
|
||||
int source_buffer_count; // number of src_buffers in use for lagged encoding
|
||||
int source_encode_index; // index of buffer in src_buffer to encode
|
||||
int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref
|
||||
int source_alt_ref_active; // an alt ref frame has been encoded and is usable
|
||||
|
||||
int last_alt_ref_sei;
|
||||
int is_src_frame_alt_ref;
|
||||
int is_next_src_alt_ref;
|
||||
int last_alt_ref_sei; // index into src_buffers of frame used as alt reference
|
||||
int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame
|
||||
int is_next_src_alt_ref; // source of next frame to encode is an exact copy of an alt ref frame
|
||||
|
||||
int gold_is_last; // golden frame same as last frame ( short circuit gold searches)
|
||||
int alt_is_last; // Alt reference frame same as last ( short circuit altref search)
|
||||
@@ -302,7 +317,11 @@ typedef struct
|
||||
|
||||
unsigned int frames_since_key;
|
||||
unsigned int key_frame_frequency;
|
||||
unsigned int next_key;
|
||||
unsigned int this_key_frame_forced;
|
||||
unsigned int next_key_frame_forced;
|
||||
|
||||
// Ambient reconstruction err target for force key frames
|
||||
int ambient_err;
|
||||
|
||||
unsigned int mode_check_freq[MAX_MODES];
|
||||
unsigned int mode_test_hit_counts[MAX_MODES];
|
||||
@@ -319,6 +338,7 @@ typedef struct
|
||||
int mvcostmultiplier;
|
||||
int subseqblockweight;
|
||||
int errthresh;
|
||||
unsigned int activity_avg;
|
||||
|
||||
int RDMULT;
|
||||
int RDDIV ;
|
||||
@@ -350,7 +370,6 @@ typedef struct
|
||||
int this_frame_target;
|
||||
int projected_frame_size;
|
||||
int last_q[2]; // Separate values for Intra/Inter
|
||||
int target_bits_per_mb;
|
||||
|
||||
double rate_correction_factor;
|
||||
double key_frame_rate_correction_factor;
|
||||
@@ -383,6 +402,7 @@ typedef struct
|
||||
int kf_overspend_bits; // Extra bits spent on key frames that need to be recovered on inter frames
|
||||
int kf_bitrate_adjustment; // Current number of bit s to try and recover on each inter frame.
|
||||
int max_gf_interval;
|
||||
int static_scene_max_gf_interval;
|
||||
int baseline_gf_interval;
|
||||
int gf_decay_rate;
|
||||
int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames
|
||||
@@ -399,6 +419,7 @@ typedef struct
|
||||
int inter_frame_target;
|
||||
double output_frame_rate;
|
||||
long long last_time_stamp_seen;
|
||||
long long last_end_time_stamp_seen;
|
||||
long long first_time_stamp_ever;
|
||||
|
||||
int ni_av_qi;
|
||||
@@ -431,6 +452,10 @@ typedef struct
|
||||
int best_quality;
|
||||
int active_best_quality;
|
||||
|
||||
int cq_target_quality;
|
||||
int maxq_max_limit;
|
||||
int maxq_min_limit;
|
||||
|
||||
int drop_frames_allowed; // Are we permitted to drop frames?
|
||||
int drop_frame; // Drop this frame?
|
||||
int drop_count; // How many frames have we dropped?
|
||||
@@ -454,8 +479,6 @@ typedef struct
|
||||
unsigned char *output_partition2;
|
||||
size_t output_partition2size;
|
||||
|
||||
pre_proc_instance ppi;
|
||||
|
||||
int frames_to_key;
|
||||
int gfu_boost;
|
||||
int kf_boost;
|
||||
@@ -465,12 +488,20 @@ typedef struct
|
||||
double total_coded_error_left;
|
||||
double start_tot_err_left;
|
||||
double min_error;
|
||||
double kf_intra_err_min;
|
||||
double gf_intra_err_min;
|
||||
|
||||
double modified_error_total;
|
||||
double modified_error_used;
|
||||
double modified_error_left;
|
||||
double clip_bpe;
|
||||
double observed_bpe;
|
||||
|
||||
double modified_total_error_left;
|
||||
double avg_iiratio;
|
||||
|
||||
int target_bandwidth;
|
||||
long long bits_left;
|
||||
long long clip_bits_total;
|
||||
FIRSTPASS_STATS *total_stats;
|
||||
FIRSTPASS_STATS *this_frame_stats;
|
||||
FIRSTPASS_STATS *stats_in, *stats_in_end;
|
||||
@@ -611,9 +642,6 @@ typedef struct
|
||||
unsigned int tempdata2;
|
||||
|
||||
int base_skip_false_prob[128];
|
||||
unsigned int section_is_low_motion;
|
||||
unsigned int section_benefits_from_aggresive_q;
|
||||
unsigned int section_is_fast_motion;
|
||||
unsigned int section_intra_rating;
|
||||
|
||||
double section_max_qfactor;
|
||||
@@ -661,6 +689,10 @@ typedef struct
|
||||
unsigned char *gf_active_flags; // Record of which MBs still refer to last golden frame either directly or through 0,0
|
||||
int gf_active_count;
|
||||
|
||||
//Store last frame's MV info for next frame MV prediction
|
||||
int_mv *lfmv;
|
||||
int *lf_ref_frame_sign_bias;
|
||||
int *lf_ref_frame;
|
||||
|
||||
} VP8_COMP;
|
||||
|
||||
@@ -670,6 +702,8 @@ void vp8_encode_frame(VP8_COMP *cpi);
|
||||
|
||||
void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size);
|
||||
|
||||
unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x);
|
||||
|
||||
int rd_cost_intra_mb(MACROBLOCKD *x);
|
||||
|
||||
void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);
|
||||
|
@@ -608,8 +608,10 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
memcpy(mdcounts, MDCounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
|
||||
}
|
||||
|
||||
//Only consider ZEROMV/ALTREF_FRAME for alt ref frame.
|
||||
if (cpi->is_src_frame_alt_ref)
|
||||
// Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
|
||||
// unless ARNR filtering is enabled in which case we want
|
||||
// an unfiltered alternative
|
||||
if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
|
||||
{
|
||||
if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
|
||||
continue;
|
||||
@@ -685,7 +687,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
#if 0
|
||||
|
||||
// Initial step Search
|
||||
bestsme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, cpi->mb.mvcost);
|
||||
bestsme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, cpi->mb.mvcost, &best_ref_mv1);
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
|
||||
@@ -698,7 +700,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
num00--;
|
||||
else
|
||||
{
|
||||
thissme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, x->mvcost);
|
||||
thissme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, x->mvcost, &best_ref_mv1);
|
||||
|
||||
if (thissme < bestsme)
|
||||
{
|
||||
@@ -724,7 +726,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
}
|
||||
else
|
||||
{
|
||||
bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
|
||||
bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv1); //sadpb < 9
|
||||
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
|
||||
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
|
||||
|
||||
@@ -743,7 +745,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
|
||||
num00--;
|
||||
else
|
||||
{
|
||||
thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
|
||||
thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv1); //sadpb = 9
|
||||
|
||||
if (thissme < bestsme)
|
||||
{
|
||||
|
@@ -1,251 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Module Title : preproc.c
|
||||
*
|
||||
* Description : Simple pre-processor.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
****************************************************************************/
|
||||
|
||||
#include "memory.h"
|
||||
#include "preproc7.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Macros
|
||||
****************************************************************************/
|
||||
#define FRAMECOUNT 7
|
||||
#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
|
||||
|
||||
/****************************************************************************
|
||||
* Imports
|
||||
****************************************************************************/
|
||||
extern void vp8_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
|
||||
|
||||
/****************************************************************************
|
||||
* Exported Global Variables
|
||||
****************************************************************************/
|
||||
void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
|
||||
void temp_filter_mmx
|
||||
(
|
||||
pre_proc_instance *ppi,
|
||||
unsigned char *s,
|
||||
unsigned char *d,
|
||||
int bytes,
|
||||
int strength
|
||||
);
|
||||
void temp_filter_wmt
|
||||
(
|
||||
pre_proc_instance *ppi,
|
||||
unsigned char *s,
|
||||
unsigned char *d,
|
||||
int bytes,
|
||||
int strength
|
||||
);
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : temp_filter_c
|
||||
*
|
||||
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
|
||||
* unsigned char *s : Pointer to source frame.
|
||||
* unsigned char *d : Pointer to destination frame.
|
||||
* int bytes : Number of bytes to filter.
|
||||
* int strength : Strength of filter to apply.
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Performs a closesness adjusted temporarl blur
|
||||
*
|
||||
* SPECIAL NOTES : Destination frame can be same as source frame.
|
||||
*
|
||||
****************************************************************************/
|
||||
void temp_filter_c
|
||||
(
|
||||
pre_proc_instance *ppi,
|
||||
unsigned char *s,
|
||||
unsigned char *d,
|
||||
int bytes,
|
||||
int strength
|
||||
)
|
||||
{
|
||||
int byte = 0;
|
||||
unsigned char *frameptr = ppi->frame_buffer;
|
||||
|
||||
if (ppi->frame == 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
int frame = 0;
|
||||
|
||||
do
|
||||
{
|
||||
*frameptr = s[byte];
|
||||
++frameptr;
|
||||
++frame;
|
||||
}
|
||||
while (frame < FRAMECOUNT);
|
||||
|
||||
d[byte] = s[byte];
|
||||
|
||||
++byte;
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
int modifier;
|
||||
int offset = (ppi->frame % FRAMECOUNT);
|
||||
|
||||
do
|
||||
{
|
||||
int accumulator = 0;
|
||||
int count = 0;
|
||||
int frame = 0;
|
||||
|
||||
frameptr[offset] = s[byte];
|
||||
|
||||
do
|
||||
{
|
||||
int pixel_value = *frameptr;
|
||||
|
||||
modifier = s[byte];
|
||||
modifier -= pixel_value;
|
||||
modifier *= modifier;
|
||||
modifier >>= strength;
|
||||
modifier *= 3;
|
||||
|
||||
if (modifier > 16)
|
||||
modifier = 16;
|
||||
|
||||
modifier = 16 - modifier;
|
||||
|
||||
accumulator += modifier * pixel_value;
|
||||
|
||||
count += modifier;
|
||||
|
||||
frameptr++;
|
||||
|
||||
++frame;
|
||||
}
|
||||
while (frame < FRAMECOUNT);
|
||||
|
||||
accumulator += (count >> 1);
|
||||
accumulator *= ppi->fixed_divide[count];
|
||||
accumulator >>= 16;
|
||||
|
||||
d[byte] = accumulator;
|
||||
|
||||
++byte;
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
|
||||
++ppi->frame;
|
||||
}
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : delete_pre_proc
|
||||
*
|
||||
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Deletes a pre-processing instance.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void delete_pre_proc(pre_proc_instance *ppi)
|
||||
{
|
||||
if (ppi->frame_buffer_alloc)
|
||||
vpx_free(ppi->frame_buffer_alloc);
|
||||
|
||||
ppi->frame_buffer_alloc = 0;
|
||||
ppi->frame_buffer = 0;
|
||||
|
||||
if (ppi->fixed_divide_alloc)
|
||||
vpx_free(ppi->fixed_divide_alloc);
|
||||
|
||||
ppi->fixed_divide_alloc = 0;
|
||||
ppi->fixed_divide = 0;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : init_pre_proc
|
||||
*
|
||||
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
|
||||
* int frame_size : Number of bytes in one frame.
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : int: 1 if successful, 0 if failed.
|
||||
*
|
||||
* FUNCTION : Initializes prepprocessor instance.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
int init_pre_proc7(pre_proc_instance *ppi, int frame_size)
|
||||
{
|
||||
int i;
|
||||
int mmx_enabled;
|
||||
int xmm_enabled;
|
||||
int wmt_enabled;
|
||||
|
||||
vp8_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
|
||||
|
||||
if (wmt_enabled)
|
||||
temp_filter = temp_filter_wmt;
|
||||
else if (mmx_enabled)
|
||||
temp_filter = temp_filter_mmx;
|
||||
else
|
||||
temp_filter = temp_filter_c;
|
||||
|
||||
|
||||
delete_pre_proc(ppi);
|
||||
|
||||
ppi->frame_buffer_alloc = vpx_malloc(32 + frame_size * FRAMECOUNT * sizeof(unsigned char));
|
||||
|
||||
if (!ppi->frame_buffer_alloc)
|
||||
{
|
||||
delete_pre_proc(ppi);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ppi->frame_buffer = (unsigned char *) ROUNDUP32(ppi->frame_buffer_alloc);
|
||||
|
||||
ppi->fixed_divide_alloc = vpx_malloc(32 + 255 * sizeof(unsigned int));
|
||||
|
||||
if (!ppi->fixed_divide_alloc)
|
||||
{
|
||||
delete_pre_proc(ppi);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ppi->fixed_divide = (unsigned int *) ROUNDUP32(ppi->fixed_divide_alloc);
|
||||
|
||||
for (i = 1; i < 255; i++)
|
||||
ppi->fixed_divide[i] = 0x10000 / i;
|
||||
|
||||
return 1;
|
||||
}
|
@@ -16,8 +16,9 @@
|
||||
#include "entropy.h"
|
||||
#include "predictdc.h"
|
||||
|
||||
//#define EXACT_QUANT
|
||||
#ifdef EXACT_QUANT
|
||||
#define EXACT_QUANT
|
||||
|
||||
#ifdef EXACT_FASTQUANT
|
||||
void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
int i, rc, eob;
|
||||
@@ -26,7 +27,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *zbin_ptr = b->zbin;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant;
|
||||
short *quant_ptr = b->quant_fast;
|
||||
short *quant_shift_ptr = b->quant_shift;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
@@ -64,6 +65,44 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
d->eob = eob + 1;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
int i, rc, eob;
|
||||
int x, y, z, sz;
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant_fast;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
|
||||
eob = -1;
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
rc = vp8_default_zig_zag1d[i];
|
||||
z = coeff_ptr[rc];
|
||||
|
||||
sz = (z >> 31); // sign of z
|
||||
x = (z ^ sz) - sz; // x = abs(z)
|
||||
|
||||
y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
|
||||
x = (y ^ sz) - sz; // get the sign back
|
||||
qcoeff_ptr[rc] = x; // write to destination
|
||||
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
|
||||
|
||||
if (y)
|
||||
{
|
||||
eob = i; // last nonzero coeffs
|
||||
}
|
||||
}
|
||||
d->eob = eob + 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef EXACT_QUANT
|
||||
void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
int i, rc, eob;
|
||||
@@ -178,39 +217,6 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
}
|
||||
|
||||
#else
|
||||
void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
int i, rc, eob;
|
||||
int zbin;
|
||||
int x, y, z, sz;
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
|
||||
eob = -1;
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
rc = vp8_default_zig_zag1d[i];
|
||||
z = coeff_ptr[rc];
|
||||
|
||||
sz = (z >> 31); // sign of z
|
||||
x = (z ^ sz) - sz; // x = abs(z)
|
||||
|
||||
y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
|
||||
x = (y ^ sz) - sz; // get the sign back
|
||||
qcoeff_ptr[rc] = x; // write to destination
|
||||
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
|
||||
|
||||
if (y)
|
||||
{
|
||||
eob = i; // last nonzero coeffs
|
||||
}
|
||||
}
|
||||
d->eob = eob + 1;
|
||||
}
|
||||
|
||||
void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
|
@@ -45,46 +45,48 @@ extern int inter_b_modes[10];
|
||||
// Bits Per MB at different Q (Multiplied by 512)
|
||||
#define BPER_MB_NORMBITS 9
|
||||
|
||||
// Work in progress recalibration of baseline rate tables based on
|
||||
// the assumption that bits per mb is inversely proportional to the
|
||||
// quantizer value.
|
||||
const int vp8_bits_per_mb[2][QINDEX_RANGE] =
|
||||
{
|
||||
// (Updated 19 March 08) Baseline estimate of INTRA-frame Bits Per MB at each Q:
|
||||
// Intra case 450000/Qintra
|
||||
{
|
||||
674781, 606845, 553905, 524293, 500428, 452540, 435379, 414719,
|
||||
390970, 371082, 359416, 341807, 336957, 317263, 303724, 298402,
|
||||
285688, 275237, 268455, 262560, 256038, 248734, 241087, 237615,
|
||||
229247, 225211, 219112, 213920, 211559, 202714, 198482, 193401,
|
||||
187866, 183453, 179212, 175965, 171852, 167235, 163972, 160560,
|
||||
156032, 154349, 151390, 148725, 145708, 142311, 139981, 137700,
|
||||
134084, 131863, 129746, 128498, 126077, 123461, 121290, 117782,
|
||||
114883, 112332, 108410, 105685, 103434, 101192, 98587, 95959,
|
||||
94059, 92017, 89970, 87936, 86142, 84801, 82736, 81106,
|
||||
79668, 78135, 76641, 75103, 73943, 72693, 71401, 70098,
|
||||
69165, 67901, 67170, 65987, 64923, 63534, 62378, 61302,
|
||||
59921, 58941, 57844, 56782, 55960, 54973, 54257, 53454,
|
||||
52230, 50938, 49962, 49190, 48288, 47270, 46738, 46037,
|
||||
45020, 44027, 43216, 42287, 41594, 40702, 40081, 39414,
|
||||
38282, 37627, 36987, 36375, 35808, 35236, 34710, 34162,
|
||||
33659, 33327, 32751, 32384, 31936, 31461, 30982, 30582,
|
||||
1125000,900000, 750000, 642857, 562500, 500000, 450000, 450000,
|
||||
409090, 375000, 346153, 321428, 300000, 281250, 264705, 264705,
|
||||
250000, 236842, 225000, 225000, 214285, 214285, 204545, 204545,
|
||||
195652, 195652, 187500, 180000, 180000, 173076, 166666, 160714,
|
||||
155172, 150000, 145161, 140625, 136363, 132352, 128571, 125000,
|
||||
121621, 121621, 118421, 115384, 112500, 109756, 107142, 104651,
|
||||
102272, 100000, 97826, 97826, 95744, 93750, 91836, 90000,
|
||||
88235, 86538, 84905, 83333, 81818, 80357, 78947, 77586,
|
||||
76271, 75000, 73770, 72580, 71428, 70312, 69230, 68181,
|
||||
67164, 66176, 65217, 64285, 63380, 62500, 61643, 60810,
|
||||
60000, 59210, 59210, 58441, 57692, 56962, 56250, 55555,
|
||||
54878, 54216, 53571, 52941, 52325, 51724, 51136, 50561,
|
||||
49450, 48387, 47368, 46875, 45918, 45000, 44554, 44117,
|
||||
43269, 42452, 41666, 40909, 40178, 39473, 38793, 38135,
|
||||
36885, 36290, 35714, 35156, 34615, 34090, 33582, 33088,
|
||||
32608, 32142, 31468, 31034, 30405, 29801, 29220, 28662,
|
||||
},
|
||||
|
||||
// (Updated 19 March 08) Baseline estimate of INTER-frame Bits Per MB at each Q:
|
||||
// Inter case 285000/Qinter
|
||||
{
|
||||
497401, 426316, 372064, 352732, 335763, 283921, 273848, 253321,
|
||||
233181, 217727, 210030, 196685, 194836, 178396, 167753, 164116,
|
||||
154119, 146929, 142254, 138488, 133591, 127741, 123166, 120226,
|
||||
114188, 111756, 107882, 104749, 102522, 96451, 94424, 90905,
|
||||
87286, 84931, 82111, 80534, 77610, 74700, 73037, 70715,
|
||||
68006, 67235, 65374, 64009, 62134, 60180, 59105, 57691,
|
||||
55509, 54512, 53318, 52693, 51194, 49840, 48944, 46980,
|
||||
45668, 44177, 42348, 40994, 39859, 38889, 37717, 36391,
|
||||
35482, 34622, 33795, 32756, 32002, 31492, 30573, 29737,
|
||||
29152, 28514, 27941, 27356, 26859, 26329, 25874, 25364,
|
||||
24957, 24510, 24290, 23689, 23380, 22845, 22481, 22066,
|
||||
21587, 21219, 20880, 20452, 20260, 19926, 19661, 19334,
|
||||
18915, 18391, 18046, 17833, 17441, 17105, 16888, 16729,
|
||||
16383, 16023, 15706, 15442, 15222, 14938, 14673, 14452,
|
||||
14005, 13807, 13611, 13447, 13223, 13102, 12963, 12801,
|
||||
12627, 12534, 12356, 12228, 12056, 11907, 11746, 11643,
|
||||
712500, 570000, 475000, 407142, 356250, 316666, 285000, 259090,
|
||||
237500, 219230, 203571, 190000, 178125, 167647, 158333, 150000,
|
||||
142500, 135714, 129545, 123913, 118750, 114000, 109615, 105555,
|
||||
101785, 98275, 95000, 91935, 89062, 86363, 83823, 81428,
|
||||
79166, 77027, 75000, 73076, 71250, 69512, 67857, 66279,
|
||||
64772, 63333, 61956, 60638, 59375, 58163, 57000, 55882,
|
||||
54807, 53773, 52777, 51818, 50892, 50000, 49137, 47500,
|
||||
45967, 44531, 43181, 41911, 40714, 39583, 38513, 37500,
|
||||
36538, 35625, 34756, 33928, 33139, 32386, 31666, 30978,
|
||||
30319, 29687, 29081, 28500, 27941, 27403, 26886, 26388,
|
||||
25909, 25446, 25000, 24568, 23949, 23360, 22800, 22265,
|
||||
21755, 21268, 20802, 20357, 19930, 19520, 19127, 18750,
|
||||
18387, 18037, 17701, 17378, 17065, 16764, 16473, 16101,
|
||||
15745, 15405, 15079, 14766, 14467, 14179, 13902, 13636,
|
||||
13380, 13133, 12895, 12666, 12445, 12179, 11924, 11632,
|
||||
11445, 11220, 11003, 10795, 10594, 10401, 10215, 10035,
|
||||
}
|
||||
};
|
||||
|
||||
@@ -324,6 +326,7 @@ void vp8_setup_key_frame(VP8_COMP *cpi)
|
||||
cpi->frames_till_gf_update_due = cpi->goldfreq;
|
||||
|
||||
cpi->common.refresh_golden_frame = TRUE;
|
||||
cpi->common.refresh_alt_ref_frame = TRUE;
|
||||
}
|
||||
|
||||
void vp8_calc_auto_iframe_target_size(VP8_COMP *cpi)
|
||||
@@ -1034,9 +1037,7 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
|
||||
gf_frame_useage = pct_gf_active;
|
||||
|
||||
// Is a fixed manual GF frequency being used
|
||||
if (!cpi->auto_gold)
|
||||
cpi->common.refresh_golden_frame = TRUE;
|
||||
else
|
||||
if (cpi->auto_gold)
|
||||
{
|
||||
// For one pass throw a GF if recent frame intra useage is low or the GF useage is high
|
||||
if ((cpi->pass == 0) && (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5))
|
||||
@@ -1549,12 +1550,21 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit,
|
||||
*frame_under_shoot_limit = cpi->this_frame_target * 5 / 8;
|
||||
}
|
||||
}
|
||||
// VBR
|
||||
// VBR and CQ mode
|
||||
// Note that tighter restrictions here can help quality but hurt encode speed
|
||||
else
|
||||
{
|
||||
*frame_over_shoot_limit = cpi->this_frame_target * 11 / 8;
|
||||
*frame_under_shoot_limit = cpi->this_frame_target * 5 / 8;
|
||||
// Stron overshoot limit for constrained quality
|
||||
if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
|
||||
{
|
||||
*frame_over_shoot_limit = cpi->this_frame_target * 11 / 8;
|
||||
*frame_under_shoot_limit = cpi->this_frame_target * 2 / 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
*frame_over_shoot_limit = cpi->this_frame_target * 11 / 8;
|
||||
*frame_under_shoot_limit = cpi->this_frame_target * 5 / 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
1684
vp8/encoder/rdopt.c
1684
vp8/encoder/rdopt.c
File diff suppressed because it is too large
Load Diff
@@ -126,6 +126,24 @@ void vp8_sad16x16x3_c(
|
||||
sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad16x16x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
|
||||
sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
|
||||
sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
|
||||
sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
|
||||
sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
|
||||
sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
|
||||
sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad16x8x3_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
@@ -139,6 +157,24 @@ void vp8_sad16x8x3_c(
|
||||
sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad16x8x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
|
||||
sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
|
||||
sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
|
||||
sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
|
||||
sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
|
||||
sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
|
||||
sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad8x8x3_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
@@ -152,6 +188,24 @@ void vp8_sad8x8x3_c(
|
||||
sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad8x8x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
|
||||
sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
|
||||
sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
|
||||
sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
|
||||
sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
|
||||
sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
|
||||
sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad8x16x3_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
@@ -165,6 +219,24 @@ void vp8_sad8x16x3_c(
|
||||
sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad8x16x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
|
||||
sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
|
||||
sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
|
||||
sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
|
||||
sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
|
||||
sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
|
||||
sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad4x4x3_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
@@ -178,6 +250,24 @@ void vp8_sad4x4x3_c(
|
||||
sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad4x4x8_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_stride,
|
||||
unsigned short *sad_array
|
||||
)
|
||||
{
|
||||
sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff);
|
||||
sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
|
||||
sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
|
||||
sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
|
||||
sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
|
||||
sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
|
||||
sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
|
||||
sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
|
||||
}
|
||||
|
||||
void vp8_sad16x16x4d_c(
|
||||
const unsigned char *src_ptr,
|
||||
int src_stride,
|
||||
|
@@ -37,29 +37,9 @@
|
||||
#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
|
||||
#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
|
||||
|
||||
#define USE_FILTER_LUT 1
|
||||
#if VP8_TEMPORAL_ALT_REF
|
||||
|
||||
#if USE_FILTER_LUT
|
||||
static int modifier_lut[7][19] =
|
||||
{
|
||||
// Strength=0
|
||||
{16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=1
|
||||
{16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=2
|
||||
{16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=3
|
||||
{16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=4
|
||||
{16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=5
|
||||
{16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
|
||||
// Strength=6
|
||||
{16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
|
||||
};
|
||||
#endif
|
||||
static void build_predictors_mb
|
||||
static void vp8_temporal_filter_predictors_mb_c
|
||||
(
|
||||
MACROBLOCKD *x,
|
||||
unsigned char *y_mb_ptr,
|
||||
@@ -79,14 +59,11 @@ static void build_predictors_mb
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
// vp8_sixtap_predict16x16_c(yptr, stride,
|
||||
// mv_col & 7, mv_row & 7, &pred[0], 16);
|
||||
x->subpixel_predict16x16(yptr, stride,
|
||||
mv_col & 7, mv_row & 7, &pred[0], 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
//vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16);
|
||||
RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
|
||||
}
|
||||
|
||||
@@ -111,7 +88,7 @@ static void build_predictors_mb
|
||||
RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
|
||||
}
|
||||
}
|
||||
static void apply_temporal_filter
|
||||
void vp8_temporal_filter_apply_c
|
||||
(
|
||||
unsigned char *frame1,
|
||||
unsigned int stride,
|
||||
@@ -120,17 +97,13 @@ static void apply_temporal_filter
|
||||
int strength,
|
||||
int filter_weight,
|
||||
unsigned int *accumulator,
|
||||
unsigned int *count
|
||||
unsigned short *count
|
||||
)
|
||||
{
|
||||
int i, j, k;
|
||||
int modifier;
|
||||
int byte = 0;
|
||||
|
||||
#if USE_FILTER_LUT
|
||||
int *lut = modifier_lut[strength];
|
||||
#endif
|
||||
|
||||
for (i = 0,k = 0; i < block_size; i++)
|
||||
{
|
||||
for (j = 0; j < block_size; j++, k++)
|
||||
@@ -139,23 +112,19 @@ static void apply_temporal_filter
|
||||
int src_byte = frame1[byte];
|
||||
int pixel_value = *frame2++;
|
||||
|
||||
#if USE_FILTER_LUT
|
||||
// LUT implementation --
|
||||
// improves precision of filter
|
||||
modifier = abs(src_byte-pixel_value);
|
||||
modifier = modifier>18 ? 0 : lut[modifier];
|
||||
#else
|
||||
modifier = src_byte;
|
||||
modifier -= pixel_value;
|
||||
modifier = src_byte - pixel_value;
|
||||
// This is an integer approximation of:
|
||||
// float coeff = (3.0 * modifer * modifier) / pow(2, strength);
|
||||
// modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
|
||||
modifier *= modifier;
|
||||
modifier >>= strength;
|
||||
modifier *= 3;
|
||||
modifier += 1 << (strength - 1);
|
||||
modifier >>= strength;
|
||||
|
||||
if (modifier > 16)
|
||||
modifier = 16;
|
||||
|
||||
modifier = 16 - modifier;
|
||||
#endif
|
||||
modifier *= filter_weight;
|
||||
|
||||
count[k] += modifier;
|
||||
@@ -171,7 +140,7 @@ static void apply_temporal_filter
|
||||
#if ALT_REF_MC_ENABLED
|
||||
static int dummy_cost[2*mv_max+1];
|
||||
|
||||
static int find_matching_mb
|
||||
static int vp8_temporal_filter_find_matching_mb_c
|
||||
(
|
||||
VP8_COMP *cpi,
|
||||
YV12_BUFFER_CONFIG *arf_frame,
|
||||
@@ -246,7 +215,7 @@ static int find_matching_mb
|
||||
step_param,
|
||||
sadpb / 2/*x->errorperbit*/,
|
||||
&num00, &cpi->fn_ptr[BLOCK_16X16],
|
||||
mvsadcost, mvcost); //sadpb < 9
|
||||
mvsadcost, mvcost, &best_ref_mv1); //sadpb < 9
|
||||
|
||||
// Further step/diamond searches as necessary
|
||||
n = 0;
|
||||
@@ -268,7 +237,7 @@ static int find_matching_mb
|
||||
step_param + n,
|
||||
sadpb / 4/*x->errorperbit*/,
|
||||
&num00, &cpi->fn_ptr[BLOCK_16X16],
|
||||
mvsadcost, mvcost); //sadpb = 9
|
||||
mvsadcost, mvcost, &best_ref_mv1); //sadpb = 9
|
||||
|
||||
if (thissme < bestsme)
|
||||
{
|
||||
@@ -292,7 +261,7 @@ static int find_matching_mb
|
||||
bestsme = cpi->find_fractional_mv_step(x, b, d,
|
||||
&d->bmi.mv.as_mv, &best_ref_mv1,
|
||||
x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
|
||||
cpi->mb.mvcost);
|
||||
mvcost);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -308,7 +277,7 @@ static int find_matching_mb
|
||||
}
|
||||
#endif
|
||||
|
||||
static void vp8cx_temp_blur1_c
|
||||
static void vp8_temporal_filter_iterate_c
|
||||
(
|
||||
VP8_COMP *cpi,
|
||||
int frame_count,
|
||||
@@ -321,17 +290,17 @@ static void vp8cx_temp_blur1_c
|
||||
int mb_col, mb_row;
|
||||
unsigned int filter_weight[MAX_LAG_BUFFERS];
|
||||
unsigned char *mm_ptr = cpi->fp_motion_map;
|
||||
int cols = cpi->common.mb_cols;
|
||||
int rows = cpi->common.mb_rows;
|
||||
int mb_cols = cpi->common.mb_cols;
|
||||
int mb_rows = cpi->common.mb_rows;
|
||||
int MBs = cpi->common.MBs;
|
||||
int mb_y_offset = 0;
|
||||
int mb_uv_offset = 0;
|
||||
unsigned int accumulator[384];
|
||||
unsigned int count[384];
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8);
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8);
|
||||
MACROBLOCKD *mbd = &cpi->mb.e_mbd;
|
||||
YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
|
||||
unsigned char *dst1, *dst2;
|
||||
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8);
|
||||
|
||||
// Save input state
|
||||
unsigned char *y_buffer = mbd->pre.y_buffer;
|
||||
@@ -345,7 +314,7 @@ static void vp8cx_temp_blur1_c
|
||||
filter_weight[frame] = 1;
|
||||
}
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
for (mb_row = 0; mb_row < mb_rows; mb_row++)
|
||||
{
|
||||
#if ALT_REF_MC_ENABLED
|
||||
// Reduced search extent by 3 for 6-tap filter & smaller UMV border
|
||||
@@ -354,14 +323,14 @@ static void vp8cx_temp_blur1_c
|
||||
+ (VP8BORDERINPIXELS - 19);
|
||||
#endif
|
||||
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||
{
|
||||
int i, j, k, w;
|
||||
int weight_cap;
|
||||
int stride;
|
||||
|
||||
vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
|
||||
vpx_memset(count, 0, 384*sizeof(unsigned int));
|
||||
vpx_memset(count, 0, 384*sizeof(unsigned short));
|
||||
|
||||
#if ALT_REF_MC_ENABLED
|
||||
// Reduced search extent by 3 for 6-tap filter & smaller UMV border
|
||||
@@ -412,11 +381,12 @@ static void vp8cx_temp_blur1_c
|
||||
#define THRESH_HIGH 20000
|
||||
|
||||
// Correlation has been lost try MC
|
||||
err = find_matching_mb ( cpi,
|
||||
cpi->frames[alt_ref_index],
|
||||
cpi->frames[frame],
|
||||
mb_y_offset,
|
||||
THRESH_LOW );
|
||||
err = vp8_temporal_filter_find_matching_mb_c
|
||||
(cpi,
|
||||
cpi->frames[alt_ref_index],
|
||||
cpi->frames[frame],
|
||||
mb_y_offset,
|
||||
THRESH_LOW);
|
||||
|
||||
if (filter_weight[frame] < 2)
|
||||
{
|
||||
@@ -429,43 +399,46 @@ static void vp8cx_temp_blur1_c
|
||||
if (filter_weight[frame] != 0)
|
||||
{
|
||||
// Construct the predictors
|
||||
build_predictors_mb (
|
||||
mbd,
|
||||
cpi->frames[frame]->y_buffer + mb_y_offset,
|
||||
cpi->frames[frame]->u_buffer + mb_uv_offset,
|
||||
cpi->frames[frame]->v_buffer + mb_uv_offset,
|
||||
cpi->frames[frame]->y_stride,
|
||||
mbd->block[0].bmi.mv.as_mv.row,
|
||||
mbd->block[0].bmi.mv.as_mv.col,
|
||||
predictor );
|
||||
vp8_temporal_filter_predictors_mb_c
|
||||
(mbd,
|
||||
cpi->frames[frame]->y_buffer + mb_y_offset,
|
||||
cpi->frames[frame]->u_buffer + mb_uv_offset,
|
||||
cpi->frames[frame]->v_buffer + mb_uv_offset,
|
||||
cpi->frames[frame]->y_stride,
|
||||
mbd->block[0].bmi.mv.as_mv.row,
|
||||
mbd->block[0].bmi.mv.as_mv.col,
|
||||
predictor);
|
||||
|
||||
// Apply the filter (YUV)
|
||||
apply_temporal_filter ( f->y_buffer + mb_y_offset,
|
||||
f->y_stride,
|
||||
predictor,
|
||||
16,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator,
|
||||
count );
|
||||
TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
|
||||
(f->y_buffer + mb_y_offset,
|
||||
f->y_stride,
|
||||
predictor,
|
||||
16,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator,
|
||||
count);
|
||||
|
||||
apply_temporal_filter ( f->u_buffer + mb_uv_offset,
|
||||
f->uv_stride,
|
||||
predictor + 256,
|
||||
8,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator + 256,
|
||||
count + 256 );
|
||||
TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
|
||||
(f->u_buffer + mb_uv_offset,
|
||||
f->uv_stride,
|
||||
predictor + 256,
|
||||
8,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator + 256,
|
||||
count + 256);
|
||||
|
||||
apply_temporal_filter ( f->v_buffer + mb_uv_offset,
|
||||
f->uv_stride,
|
||||
predictor + 320,
|
||||
8,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator + 320,
|
||||
count + 320 );
|
||||
TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
|
||||
(f->v_buffer + mb_uv_offset,
|
||||
f->uv_stride,
|
||||
predictor + 320,
|
||||
8,
|
||||
strength,
|
||||
filter_weight[frame],
|
||||
accumulator + 320,
|
||||
count + 320);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -524,8 +497,8 @@ static void vp8cx_temp_blur1_c
|
||||
mb_uv_offset += 8;
|
||||
}
|
||||
|
||||
mb_y_offset += 16*f->y_stride-f->y_width;
|
||||
mb_uv_offset += 8*f->uv_stride-f->uv_width;
|
||||
mb_y_offset += 16*(f->y_stride-mb_cols);
|
||||
mb_uv_offset += 8*(f->uv_stride-mb_cols);
|
||||
}
|
||||
|
||||
// Restore input state
|
||||
@@ -534,7 +507,7 @@ static void vp8cx_temp_blur1_c
|
||||
mbd->pre.v_buffer = v_buffer;
|
||||
}
|
||||
|
||||
void vp8cx_temp_filter_c
|
||||
void vp8_temporal_filter_prepare_c
|
||||
(
|
||||
VP8_COMP *cpi
|
||||
)
|
||||
@@ -642,7 +615,7 @@ void vp8cx_temp_filter_c
|
||||
= &cpi->src_buffer[which_buffer].source_buffer;
|
||||
}
|
||||
|
||||
vp8cx_temp_blur1_c (
|
||||
vp8_temporal_filter_iterate_c (
|
||||
cpi,
|
||||
frames_to_blur,
|
||||
frames_to_blur_backward,
|
||||
|
@@ -12,8 +12,37 @@
|
||||
#ifndef __INC_VP8_TEMPORAL_FILTER_H
|
||||
#define __INC_VP8_TEMPORAL_FILTER_H
|
||||
|
||||
#include "onyx_int.h"
|
||||
#define prototype_apply(sym)\
|
||||
void (sym) \
|
||||
( \
|
||||
unsigned char *frame1, \
|
||||
unsigned int stride, \
|
||||
unsigned char *frame2, \
|
||||
unsigned int block_size, \
|
||||
int strength, \
|
||||
int filter_weight, \
|
||||
unsigned int *accumulator, \
|
||||
unsigned short *count \
|
||||
)
|
||||
|
||||
void vp8cx_temp_filter_c(VP8_COMP *cpi);
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#include "x86/temporal_filter_x86.h"
|
||||
#endif
|
||||
|
||||
#ifndef vp8_temporal_filter_apply
|
||||
#define vp8_temporal_filter_apply vp8_temporal_filter_apply_c
|
||||
#endif
|
||||
extern prototype_apply(vp8_temporal_filter_apply);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
prototype_apply(*apply);
|
||||
} vp8_temporal_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
#define TEMPORAL_INVOKE(ctx,fn) (ctx)->fn
|
||||
#else
|
||||
#define TEMPORAL_INVOKE(ctx,fn) vp8_temporal_filter_##fn
|
||||
#endif
|
||||
|
||||
#endif // __INC_VP8_TEMPORAL_FILTER_H
|
||||
|
@@ -132,8 +132,6 @@ static void tokenize2nd_order_b
|
||||
t->Token = x;
|
||||
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
|
||||
|
||||
t->section = frametype * BLOCK_TYPES * 2 + 2 * type + (c == 0);
|
||||
|
||||
t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0));
|
||||
|
||||
++cpi->coef_counts [type] [band] [pt] [x];
|
||||
@@ -185,7 +183,6 @@ static void tokenize1st_order_b
|
||||
t->Token = x;
|
||||
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
|
||||
|
||||
t->section = frametype * BLOCK_TYPES * 2 + 2 * type + (c == 0);
|
||||
t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0));
|
||||
|
||||
++cpi->coef_counts [type] [band] [pt] [x];
|
||||
@@ -434,7 +431,6 @@ static __inline void stuff2nd_order_b
|
||||
|
||||
t->Token = DCT_EOB_TOKEN;
|
||||
t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
|
||||
t->section = 11;
|
||||
t->skip_eob_node = 0;
|
||||
++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN];
|
||||
++t;
|
||||
@@ -465,7 +461,6 @@ static __inline void stuff1st_order_b
|
||||
|
||||
t->Token = DCT_EOB_TOKEN;
|
||||
t->context_tree = cpi->common.fc.coef_probs [0] [1] [pt];
|
||||
t->section = 8;
|
||||
t->skip_eob_node = 0;
|
||||
++cpi->coef_counts [0] [1] [pt] [DCT_EOB_TOKEN];
|
||||
++t;
|
||||
@@ -495,7 +490,6 @@ void stuff1st_order_buv
|
||||
|
||||
t->Token = DCT_EOB_TOKEN;
|
||||
t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
|
||||
t->section = 13;
|
||||
t->skip_eob_node = 0;
|
||||
++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN];
|
||||
++t;
|
||||
|
@@ -25,11 +25,10 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int Token;
|
||||
int Extra;
|
||||
const vp8_prob *context_tree;
|
||||
int skip_eob_node;
|
||||
int section;
|
||||
short Extra;
|
||||
unsigned char Token;
|
||||
unsigned char skip_eob_node;
|
||||
} TOKENEXTRA;
|
||||
|
||||
int rd_cost_mby(MACROBLOCKD *);
|
||||
|
@@ -32,6 +32,16 @@
|
||||
unsigned int *sad_array\
|
||||
)
|
||||
|
||||
#define prototype_sad_multi_same_address_1(sym)\
|
||||
void (sym)\
|
||||
(\
|
||||
const unsigned char *src_ptr, \
|
||||
int source_stride, \
|
||||
const unsigned char *ref_ptr, \
|
||||
int ref_stride, \
|
||||
unsigned short *sad_array\
|
||||
)
|
||||
|
||||
#define prototype_sad_multi_dif_address(sym)\
|
||||
void (sym)\
|
||||
(\
|
||||
@@ -138,6 +148,31 @@ extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3);
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3);
|
||||
|
||||
#ifndef vp8_variance_sad16x16x8
|
||||
#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address_1(vp8_variance_sad16x16x8);
|
||||
|
||||
#ifndef vp8_variance_sad16x8x8
|
||||
#define vp8_variance_sad16x8x8 vp8_sad16x8x8_c
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address_1(vp8_variance_sad16x8x8);
|
||||
|
||||
#ifndef vp8_variance_sad8x8x8
|
||||
#define vp8_variance_sad8x8x8 vp8_sad8x8x8_c
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address_1(vp8_variance_sad8x8x8);
|
||||
|
||||
#ifndef vp8_variance_sad8x16x8
|
||||
#define vp8_variance_sad8x16x8 vp8_sad8x16x8_c
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address_1(vp8_variance_sad8x16x8);
|
||||
|
||||
#ifndef vp8_variance_sad4x4x8
|
||||
#define vp8_variance_sad4x4x8 vp8_sad4x4x8_c
|
||||
#endif
|
||||
extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8);
|
||||
|
||||
//-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|
||||
#ifndef vp8_variance_sad16x16x4d
|
||||
@@ -274,6 +309,7 @@ extern prototype_sad(vp8_variance_get4x4sse_cs);
|
||||
|
||||
typedef prototype_sad(*vp8_sad_fn_t);
|
||||
typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
|
||||
typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
|
||||
typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t);
|
||||
typedef prototype_variance(*vp8_variance_fn_t);
|
||||
typedef prototype_variance2(*vp8_variance2_fn_t);
|
||||
@@ -317,6 +353,12 @@ typedef struct
|
||||
vp8_sad_multi_fn_t sad8x8x3;
|
||||
vp8_sad_multi_fn_t sad4x4x3;
|
||||
|
||||
vp8_sad_multi1_fn_t sad16x16x8;
|
||||
vp8_sad_multi1_fn_t sad16x8x8;
|
||||
vp8_sad_multi1_fn_t sad8x16x8;
|
||||
vp8_sad_multi1_fn_t sad8x8x8;
|
||||
vp8_sad_multi1_fn_t sad4x4x8;
|
||||
|
||||
vp8_sad_multi_d_fn_t sad16x16x4d;
|
||||
vp8_sad_multi_d_fn_t sad16x8x4d;
|
||||
vp8_sad_multi_d_fn_t sad8x16x4d;
|
||||
@@ -334,6 +376,7 @@ typedef struct
|
||||
vp8_variance_fn_t svf_halfpix_v;
|
||||
vp8_variance_fn_t svf_halfpix_hv;
|
||||
vp8_sad_multi_fn_t sdx3f;
|
||||
vp8_sad_multi1_fn_t sdx8f;
|
||||
vp8_sad_multi_d_fn_t sdx4df;
|
||||
} vp8_variance_fn_ptr_t;
|
||||
|
||||
|
@@ -11,511 +11,231 @@
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
section .text
|
||||
global sym(vp8_short_fdct4x4_mmx)
|
||||
global sym(vp8_short_fdct8x4_wmt)
|
||||
|
||||
|
||||
%define DCTCONSTANTSBITS (16)
|
||||
%define DCTROUNDINGVALUE (1<< (DCTCONSTANTSBITS-1))
|
||||
%define x_c1 (60547) ; cos(pi /8) * (1<<15)
|
||||
%define x_c2 (46341) ; cos(pi*2/8) * (1<<15)
|
||||
%define x_c3 (25080) ; cos(pi*3/8) * (1<<15)
|
||||
|
||||
|
||||
;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_fdct4x4_mmx)
|
||||
sym(vp8_short_fdct4x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
mov rsi, arg(0) ;input
|
||||
mov rdi, arg(1) ;output
|
||||
|
||||
lea rdx, [GLOBAL(dct_const_mmx)]
|
||||
movsxd rax, dword ptr arg(2) ;pitch
|
||||
mov rsi, arg(0) ; input
|
||||
mov rdi, arg(1) ; output
|
||||
|
||||
lea rcx, [rsi + rax*2]
|
||||
movsxd rax, dword ptr arg(2) ;pitch
|
||||
|
||||
lea rcx, [rsi + rax*2]
|
||||
; read the input data
|
||||
movq mm0, [rsi]
|
||||
movq mm1, [rsi + rax ]
|
||||
movq mm0, [rsi]
|
||||
movq mm1, [rsi + rax]
|
||||
|
||||
movq mm2, [rcx]
|
||||
movq mm3, [rcx + rax]
|
||||
; get the constants
|
||||
;shift to left by 1 for prescision
|
||||
psllw mm0, 3
|
||||
psllw mm1, 3
|
||||
movq mm2, [rcx]
|
||||
movq mm4, [rcx + rax]
|
||||
|
||||
psllw mm2, 3
|
||||
psllw mm3, 3
|
||||
; transpose for the first stage
|
||||
movq mm3, mm0 ; 00 01 02 03
|
||||
movq mm5, mm2 ; 20 21 22 23
|
||||
|
||||
; transpose for the second stage
|
||||
movq mm4, mm0 ; 00 01 02 03
|
||||
movq mm5, mm2 ; 10 11 12 03
|
||||
punpcklwd mm0, mm1 ; 00 10 01 11
|
||||
punpckhwd mm3, mm1 ; 02 12 03 13
|
||||
|
||||
punpcklwd mm0, mm1 ; 00 10 01 11
|
||||
punpckhwd mm4, mm1 ; 02 12 03 13
|
||||
punpcklwd mm2, mm4 ; 20 30 21 31
|
||||
punpckhwd mm5, mm4 ; 22 32 23 33
|
||||
|
||||
punpcklwd mm2, mm3 ; 20 30 21 31
|
||||
punpckhwd mm5, mm3 ; 22 32 23 33
|
||||
movq mm1, mm0 ; 00 10 01 11
|
||||
punpckldq mm0, mm2 ; 00 10 20 30
|
||||
|
||||
punpckhdq mm1, mm2 ; 01 11 21 31
|
||||
|
||||
movq mm1, mm0 ; 00 10 01 11
|
||||
punpckldq mm0, mm2 ; 00 10 20 30
|
||||
movq mm2, mm3 ; 02 12 03 13
|
||||
punpckldq mm2, mm5 ; 02 12 22 32
|
||||
|
||||
punpckhdq mm1, mm2 ; 01 11 21 31
|
||||
|
||||
movq mm2, mm4 ; 02 12 03 13
|
||||
punpckldq mm2, mm5 ; 02 12 22 32
|
||||
|
||||
punpckhdq mm4, mm5 ; 03 13 23 33
|
||||
movq mm3, mm4
|
||||
punpckhdq mm3, mm5 ; 03 13 23 33
|
||||
|
||||
; mm0 0
|
||||
; mm1 1
|
||||
; mm2 2
|
||||
; mm3 3
|
||||
|
||||
; first stage
|
||||
movq mm5, mm0
|
||||
movq mm4, mm1
|
||||
movq mm5, mm0
|
||||
movq mm4, mm1
|
||||
|
||||
paddw mm0, mm3 ; a = 0 + 3
|
||||
paddw mm1, mm2 ; b = 1 + 2
|
||||
paddw mm0, mm3 ; a1 = 0 + 3
|
||||
paddw mm1, mm2 ; b1 = 1 + 2
|
||||
|
||||
psubw mm4, mm2 ; c = 1 - 2
|
||||
psubw mm5, mm3 ; d = 0 - 3
|
||||
psubw mm4, mm2 ; c1 = 1 - 2
|
||||
psubw mm5, mm3 ; d1 = 0 - 3
|
||||
|
||||
psllw mm5, 3
|
||||
psllw mm4, 3
|
||||
|
||||
psllw mm0, 3
|
||||
psllw mm1, 3
|
||||
|
||||
; output 0 and 2
|
||||
movq mm6, [rdx + 16] ; c2
|
||||
movq mm2, mm0 ; a
|
||||
movq mm2, mm0 ; a1
|
||||
|
||||
paddw mm0, mm1 ; a + b
|
||||
psubw mm2, mm1 ; a - b
|
||||
|
||||
movq mm1, mm0 ; a + b
|
||||
pmulhw mm0, mm6 ; 00 01 02 03
|
||||
|
||||
paddw mm0, mm1 ; output 00 01 02 03
|
||||
pmulhw mm6, mm2 ; 20 21 22 23
|
||||
|
||||
paddw mm2, mm6 ; output 20 21 22 23
|
||||
paddw mm0, mm1 ; op[0] = a1 + b1
|
||||
psubw mm2, mm1 ; op[2] = a1 - b1
|
||||
|
||||
; output 1 and 3
|
||||
movq mm6, [rdx + 8] ; c1
|
||||
movq mm7, [rdx + 24] ; c3
|
||||
; interleave c1, d1
|
||||
movq mm1, mm5 ; d1
|
||||
punpcklwd mm1, mm4 ; c1 d1
|
||||
punpckhwd mm5, mm4 ; c1 d1
|
||||
|
||||
movq mm1, mm4 ; c
|
||||
movq mm3, mm5 ; d
|
||||
movq mm3, mm1
|
||||
movq mm4, mm5
|
||||
|
||||
pmulhw mm1, mm7 ; c * c3
|
||||
pmulhw mm3, mm6 ; d * c1
|
||||
pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
|
||||
paddw mm3, mm5 ; d * c1 rounded
|
||||
paddw mm1, mm3 ; output 10 11 12 13
|
||||
pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
|
||||
movq mm3, mm4 ; c
|
||||
pmulhw mm5, mm7 ; d * c3
|
||||
paddd mm1, MMWORD PTR[GLOBAL(_14500)]
|
||||
paddd mm4, MMWORD PTR[GLOBAL(_14500)]
|
||||
paddd mm3, MMWORD PTR[GLOBAL(_7500)]
|
||||
paddd mm5, MMWORD PTR[GLOBAL(_7500)]
|
||||
|
||||
pmulhw mm4, mm6 ; c * c1
|
||||
paddw mm3, mm4 ; round c* c1
|
||||
|
||||
psubw mm5, mm3 ; output 30 31 32 33
|
||||
movq mm3, mm5
|
||||
psrad mm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
|
||||
psrad mm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
|
||||
psrad mm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
|
||||
psrad mm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
|
||||
|
||||
packssdw mm1, mm4 ; op[1]
|
||||
packssdw mm3, mm5 ; op[3]
|
||||
|
||||
; done with vertical
|
||||
; transpose for the second stage
|
||||
movq mm4, mm0 ; 00 01 02 03
|
||||
movq mm5, mm2 ; 10 11 12 03
|
||||
movq mm4, mm0 ; 00 10 20 30
|
||||
movq mm5, mm2 ; 02 12 22 32
|
||||
|
||||
punpcklwd mm0, mm1 ; 00 10 01 11
|
||||
punpckhwd mm4, mm1 ; 02 12 03 13
|
||||
punpcklwd mm0, mm1 ; 00 01 10 11
|
||||
punpckhwd mm4, mm1 ; 20 21 30 31
|
||||
|
||||
punpcklwd mm2, mm3 ; 20 30 21 31
|
||||
punpckhwd mm5, mm3 ; 22 32 23 33
|
||||
punpcklwd mm2, mm3 ; 02 03 12 13
|
||||
punpckhwd mm5, mm3 ; 22 23 32 33
|
||||
|
||||
movq mm1, mm0 ; 00 01 10 11
|
||||
punpckldq mm0, mm2 ; 00 01 02 03
|
||||
|
||||
movq mm1, mm0 ; 00 10 01 11
|
||||
punpckldq mm0, mm2 ; 00 10 20 30
|
||||
punpckhdq mm1, mm2 ; 01 22 12 13
|
||||
|
||||
punpckhdq mm1, mm2 ; 01 11 21 31
|
||||
movq mm2, mm4 ; 20 31 30 31
|
||||
punpckldq mm2, mm5 ; 20 21 22 23
|
||||
|
||||
movq mm2, mm4 ; 02 12 03 13
|
||||
punpckldq mm2, mm5 ; 02 12 22 32
|
||||
punpckhdq mm4, mm5 ; 30 31 32 33
|
||||
|
||||
punpckhdq mm4, mm5 ; 03 13 23 33
|
||||
movq mm3, mm4
|
||||
; mm0 0
|
||||
; mm1 1
|
||||
; mm2 2
|
||||
; mm3 4
|
||||
|
||||
movq mm5, mm0
|
||||
movq mm3, mm1
|
||||
|
||||
; first stage
|
||||
movq mm5, mm0
|
||||
movq mm4, mm1
|
||||
paddw mm0, mm4 ; a1 = 0 + 3
|
||||
paddw mm1, mm2 ; b1 = 1 + 2
|
||||
|
||||
paddw mm0, mm3 ; a = 0 + 3
|
||||
paddw mm1, mm2 ; b = 1 + 2
|
||||
psubw mm3, mm2 ; c1 = 1 - 2
|
||||
psubw mm5, mm4 ; d1 = 0 - 3
|
||||
|
||||
psubw mm4, mm2 ; c = 1 - 2
|
||||
psubw mm5, mm3 ; d = 0 - 3
|
||||
pxor mm6, mm6 ; zero out for compare
|
||||
|
||||
pcmpeqw mm6, mm5 ; d1 != 0
|
||||
|
||||
pandn mm6, MMWORD PTR[GLOBAL(_cmp_mask)] ; clear upper,
|
||||
; and keep bit 0 of lower
|
||||
|
||||
; output 0 and 2
|
||||
movq mm6, [rdx + 16] ; c2
|
||||
movq mm2, mm0 ; a
|
||||
paddw mm0, mm1 ; a + b
|
||||
movq mm2, mm0 ; a1
|
||||
|
||||
psubw mm2, mm1 ; a - b
|
||||
paddw mm0, mm1 ; a1 + b1
|
||||
psubw mm2, mm1 ; a1 - b1
|
||||
|
||||
movq mm1, mm0 ; a + b
|
||||
pmulhw mm0, mm6 ; 00 01 02 03
|
||||
paddw mm0, MMWORD PTR[GLOBAL(_7w)]
|
||||
paddw mm2, MMWORD PTR[GLOBAL(_7w)]
|
||||
|
||||
paddw mm0, mm1 ; output 00 01 02 03
|
||||
pmulhw mm6, mm2 ; 20 21 22 23
|
||||
|
||||
paddw mm2, mm6 ; output 20 21 22 23
|
||||
psraw mm0, 4 ; op[0] = (a1 + b1 + 7)>>4
|
||||
psraw mm2, 4 ; op[8] = (a1 - b1 + 7)>>4
|
||||
|
||||
movq MMWORD PTR[rdi + 0 ], mm0
|
||||
movq MMWORD PTR[rdi + 16], mm2
|
||||
|
||||
; output 1 and 3
|
||||
movq mm6, [rdx + 8] ; c1
|
||||
movq mm7, [rdx + 24] ; c3
|
||||
; interleave c1, d1
|
||||
movq mm1, mm5 ; d1
|
||||
punpcklwd mm1, mm3 ; c1 d1
|
||||
punpckhwd mm5, mm3 ; c1 d1
|
||||
|
||||
movq mm1, mm4 ; c
|
||||
movq mm3, mm5 ; d
|
||||
movq mm3, mm1
|
||||
movq mm4, mm5
|
||||
|
||||
pmulhw mm1, mm7 ; c * c3
|
||||
pmulhw mm3, mm6 ; d * c1
|
||||
pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
|
||||
paddw mm3, mm5 ; d * c1 rounded
|
||||
paddw mm1, mm3 ; output 10 11 12 13
|
||||
pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
|
||||
movq mm3, mm4 ; c
|
||||
pmulhw mm5, mm7 ; d * c3
|
||||
paddd mm1, MMWORD PTR[GLOBAL(_12000)]
|
||||
paddd mm4, MMWORD PTR[GLOBAL(_12000)]
|
||||
paddd mm3, MMWORD PTR[GLOBAL(_51000)]
|
||||
paddd mm5, MMWORD PTR[GLOBAL(_51000)]
|
||||
|
||||
pmulhw mm4, mm6 ; c * c1
|
||||
paddw mm3, mm4 ; round c* c1
|
||||
psrad mm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
|
||||
psrad mm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
|
||||
psrad mm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
|
||||
psrad mm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
|
||||
|
||||
psubw mm5, mm3 ; output 30 31 32 33
|
||||
movq mm3, mm5
|
||||
; done with vertical
|
||||
packssdw mm1, mm4 ; op[4]
|
||||
packssdw mm3, mm5 ; op[12]
|
||||
|
||||
pcmpeqw mm4, mm4
|
||||
pcmpeqw mm5, mm5
|
||||
psrlw mm4, 15
|
||||
psrlw mm5, 15
|
||||
paddw mm1, mm6 ; op[4] += (d1!=0)
|
||||
|
||||
psllw mm4, 2
|
||||
psllw mm5, 2
|
||||
movq MMWORD PTR[rdi + 8 ], mm1
|
||||
movq MMWORD PTR[rdi + 24], mm3
|
||||
|
||||
paddw mm0, mm4
|
||||
paddw mm1, mm5
|
||||
paddw mm2, mm4
|
||||
paddw mm3, mm5
|
||||
|
||||
psraw mm0, 3
|
||||
psraw mm1, 3
|
||||
psraw mm2, 3
|
||||
psraw mm3, 3
|
||||
|
||||
movq [rdi ], mm0
|
||||
movq [rdi+ 8], mm1
|
||||
movq [rdi+16], mm2
|
||||
movq [rdi+24], mm3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch)
|
||||
sym(vp8_short_fdct8x4_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
mov rsi, arg(0) ;input
|
||||
mov rdi, arg(1) ;output
|
||||
|
||||
lea rdx, [GLOBAL(dct_const_xmm)]
|
||||
movsxd rax, dword ptr arg(2) ;pitch
|
||||
|
||||
lea rcx, [rsi + rax*2]
|
||||
; read the input data
|
||||
movdqa xmm0, [rsi]
|
||||
movdqa xmm2, [rsi + rax]
|
||||
|
||||
movdqa xmm4, [rcx]
|
||||
movdqa xmm3, [rcx + rax]
|
||||
; get the constants
|
||||
;shift to left by 1 for prescision
|
||||
psllw xmm0, 3
|
||||
psllw xmm2, 3
|
||||
|
||||
psllw xmm4, 3
|
||||
psllw xmm3, 3
|
||||
|
||||
; transpose for the second stage
|
||||
movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07
|
||||
movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27
|
||||
|
||||
punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13
|
||||
punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17
|
||||
|
||||
punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33
|
||||
punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37
|
||||
|
||||
movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13
|
||||
punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31
|
||||
|
||||
punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33
|
||||
|
||||
|
||||
movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17
|
||||
punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35
|
||||
|
||||
punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37
|
||||
movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33
|
||||
|
||||
punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37
|
||||
punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36
|
||||
|
||||
movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31
|
||||
punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34
|
||||
|
||||
punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35
|
||||
|
||||
; xmm0 0
|
||||
; xmm1 1
|
||||
; xmm2 2
|
||||
; xmm3 3
|
||||
|
||||
; first stage
|
||||
movdqa xmm5, xmm0
|
||||
movdqa xmm4, xmm1
|
||||
|
||||
paddw xmm0, xmm3 ; a = 0 + 3
|
||||
paddw xmm1, xmm2 ; b = 1 + 2
|
||||
|
||||
psubw xmm4, xmm2 ; c = 1 - 2
|
||||
psubw xmm5, xmm3 ; d = 0 - 3
|
||||
|
||||
|
||||
; output 0 and 2
|
||||
movdqa xmm6, [rdx + 32] ; c2
|
||||
movdqa xmm2, xmm0 ; a
|
||||
|
||||
paddw xmm0, xmm1 ; a + b
|
||||
psubw xmm2, xmm1 ; a - b
|
||||
|
||||
movdqa xmm1, xmm0 ; a + b
|
||||
pmulhw xmm0, xmm6 ; 00 01 02 03
|
||||
|
||||
paddw xmm0, xmm1 ; output 00 01 02 03
|
||||
pmulhw xmm6, xmm2 ; 20 21 22 23
|
||||
|
||||
paddw xmm2, xmm6 ; output 20 21 22 23
|
||||
|
||||
; output 1 and 3
|
||||
movdqa xmm6, [rdx + 16] ; c1
|
||||
movdqa xmm7, [rdx + 48] ; c3
|
||||
|
||||
movdqa xmm1, xmm4 ; c
|
||||
movdqa xmm3, xmm5 ; d
|
||||
|
||||
pmulhw xmm1, xmm7 ; c * c3
|
||||
pmulhw xmm3, xmm6 ; d * c1
|
||||
|
||||
paddw xmm3, xmm5 ; d * c1 rounded
|
||||
paddw xmm1, xmm3 ; output 10 11 12 13
|
||||
|
||||
movdqa xmm3, xmm4 ; c
|
||||
pmulhw xmm5, xmm7 ; d * c3
|
||||
|
||||
pmulhw xmm4, xmm6 ; c * c1
|
||||
paddw xmm3, xmm4 ; round c* c1
|
||||
|
||||
psubw xmm5, xmm3 ; output 30 31 32 33
|
||||
movdqa xmm3, xmm5
|
||||
|
||||
|
||||
; done with vertical
|
||||
; transpose for the second stage
|
||||
movdqa xmm4, xmm2 ; 02 12 22 32 06 16 26 36
|
||||
movdqa xmm2, xmm1 ; 01 11 21 31 05 15 25 35
|
||||
|
||||
movdqa xmm1, xmm0 ; 00 10 20 30 04 14 24 34
|
||||
movdqa xmm5, xmm4 ; 02 12 22 32 06 16 26 36
|
||||
|
||||
punpcklwd xmm0, xmm2 ; 00 01 10 11 20 21 30 31
|
||||
punpckhwd xmm1, xmm2 ; 04 05 14 15 24 25 34 35
|
||||
|
||||
punpcklwd xmm4, xmm3 ; 02 03 12 13 22 23 32 33
|
||||
punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37
|
||||
|
||||
movdqa xmm2, xmm0 ; 00 01 10 11 20 21 30 31
|
||||
punpckldq xmm0, xmm4 ; 00 01 02 03 10 11 12 13
|
||||
|
||||
punpckhdq xmm2, xmm4 ; 20 21 22 23 30 31 32 33
|
||||
|
||||
|
||||
movdqa xmm4, xmm1 ; 04 05 14 15 24 25 34 35
|
||||
punpckldq xmm4, xmm5 ; 04 05 06 07 14 15 16 17
|
||||
|
||||
punpckhdq xmm1, xmm5 ; 24 25 26 27 34 35 36 37
|
||||
movdqa xmm3, xmm2 ; 20 21 22 23 30 31 32 33
|
||||
|
||||
punpckhqdq xmm3, xmm1 ; 30 31 32 33 34 35 36 37
|
||||
punpcklqdq xmm2, xmm1 ; 20 21 22 23 24 25 26 27
|
||||
|
||||
movdqa xmm1, xmm0 ; 00 01 02 03 10 11 12 13
|
||||
punpcklqdq xmm0, xmm4 ; 00 01 02 03 04 05 06 07
|
||||
|
||||
punpckhqdq xmm1, xmm4 ; 10 11 12 13 14 15 16 17
|
||||
|
||||
; first stage
|
||||
movdqa xmm5, xmm0
|
||||
movdqa xmm4, xmm1
|
||||
|
||||
paddw xmm0, xmm3 ; a = 0 + 3
|
||||
paddw xmm1, xmm2 ; b = 1 + 2
|
||||
|
||||
psubw xmm4, xmm2 ; c = 1 - 2
|
||||
psubw xmm5, xmm3 ; d = 0 - 3
|
||||
|
||||
|
||||
; output 0 and 2
|
||||
movdqa xmm6, [rdx + 32] ; c2
|
||||
movdqa xmm2, xmm0 ; a
|
||||
|
||||
paddw xmm0, xmm1 ; a + b
|
||||
psubw xmm2, xmm1 ; a - b
|
||||
|
||||
movdqa xmm1, xmm0 ; a + b
|
||||
pmulhw xmm0, xmm6 ; 00 01 02 03
|
||||
|
||||
paddw xmm0, xmm1 ; output 00 01 02 03
|
||||
pmulhw xmm6, xmm2 ; 20 21 22 23
|
||||
|
||||
paddw xmm2, xmm6 ; output 20 21 22 23
|
||||
|
||||
; output 1 and 3
|
||||
movdqa xmm6, [rdx + 16] ; c1
|
||||
movdqa xmm7, [rdx + 48] ; c3
|
||||
|
||||
movdqa xmm1, xmm4 ; c
|
||||
movdqa xmm3, xmm5 ; d
|
||||
|
||||
pmulhw xmm1, xmm7 ; c * c3
|
||||
pmulhw xmm3, xmm6 ; d * c1
|
||||
|
||||
paddw xmm3, xmm5 ; d * c1 rounded
|
||||
paddw xmm1, xmm3 ; output 10 11 12 13
|
||||
|
||||
movdqa xmm3, xmm4 ; c
|
||||
pmulhw xmm5, xmm7 ; d * c3
|
||||
|
||||
pmulhw xmm4, xmm6 ; c * c1
|
||||
paddw xmm3, xmm4 ; round c* c1
|
||||
|
||||
psubw xmm5, xmm3 ; output 30 31 32 33
|
||||
movdqa xmm3, xmm5
|
||||
; done with vertical
|
||||
|
||||
|
||||
pcmpeqw xmm4, xmm4
|
||||
pcmpeqw xmm5, xmm5;
|
||||
psrlw xmm4, 15
|
||||
psrlw xmm5, 15
|
||||
|
||||
psllw xmm4, 2
|
||||
psllw xmm5, 2
|
||||
|
||||
paddw xmm0, xmm4
|
||||
paddw xmm1, xmm5
|
||||
paddw xmm2, xmm4
|
||||
paddw xmm3, xmm5
|
||||
|
||||
psraw xmm0, 3
|
||||
psraw xmm1, 3
|
||||
psraw xmm2, 3
|
||||
psraw xmm3, 3
|
||||
|
||||
movq QWORD PTR[rdi ], xmm0
|
||||
movq QWORD PTR[rdi+ 8], xmm1
|
||||
movq QWORD PTR[rdi+16], xmm2
|
||||
movq QWORD PTR[rdi+24], xmm3
|
||||
|
||||
psrldq xmm0, 8
|
||||
psrldq xmm1, 8
|
||||
psrldq xmm2, 8
|
||||
psrldq xmm3, 8
|
||||
|
||||
movq QWORD PTR[rdi+32], xmm0
|
||||
movq QWORD PTR[rdi+40], xmm1
|
||||
movq QWORD PTR[rdi+48], xmm2
|
||||
movq QWORD PTR[rdi+56], xmm3
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
;static const unsigned int dct1st_stage_rounding_mmx[2] =
|
||||
align 16
|
||||
dct1st_stage_rounding_mmx:
|
||||
times 2 dd 8192
|
||||
|
||||
|
||||
;static const unsigned int dct2nd_stage_rounding_mmx[2] =
|
||||
align 16
|
||||
dct2nd_stage_rounding_mmx:
|
||||
times 2 dd 32768
|
||||
|
||||
|
||||
;static const short dct_matrix[4][4]=
|
||||
align 16
|
||||
dct_matrix:
|
||||
times 4 dw 23170
|
||||
|
||||
dw 30274
|
||||
dw 12540
|
||||
dw -12540
|
||||
dw -30274
|
||||
|
||||
dw 23170
|
||||
times 2 dw -23170
|
||||
dw 23170
|
||||
|
||||
dw 12540
|
||||
dw -30274
|
||||
dw 30274
|
||||
dw -12540
|
||||
|
||||
|
||||
;static const unsigned short dct_const_mmx[4 * 4]=
|
||||
align 16
|
||||
dct_const_mmx:
|
||||
times 4 dw 0
|
||||
times 4 dw 60547
|
||||
times 4 dw 46341
|
||||
times 4 dw 25080
|
||||
|
||||
|
||||
;static const unsigned short dct_const_xmm[8 * 4]=
|
||||
align 16
|
||||
dct_const_xmm:
|
||||
times 8 dw 0
|
||||
times 8 dw 60547
|
||||
times 8 dw 46341
|
||||
times 8 dw 25080
|
||||
align 8
|
||||
_5352_2217:
|
||||
dw 5352
|
||||
dw 2217
|
||||
dw 5352
|
||||
dw 2217
|
||||
align 8
|
||||
_2217_neg5352:
|
||||
dw 2217
|
||||
dw -5352
|
||||
dw 2217
|
||||
dw -5352
|
||||
align 8
|
||||
_cmp_mask:
|
||||
times 4 dw 1
|
||||
align 8
|
||||
_7w:
|
||||
times 4 dw 7
|
||||
align 8
|
||||
_14500:
|
||||
times 2 dd 14500
|
||||
align 8
|
||||
_7500:
|
||||
times 2 dd 7500
|
||||
align 8
|
||||
_12000:
|
||||
times 2 dd 12000
|
||||
align 8
|
||||
_51000:
|
||||
times 2 dd 51000
|
||||
|
@@ -11,32 +11,68 @@
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_fdct4x4_sse2)
|
||||
sym(vp8_short_fdct4x4_sse2):
|
||||
%macro STACK_FRAME_CREATE 0
|
||||
%if ABI_IS_32BIT
|
||||
%define input rsi
|
||||
%define output rdi
|
||||
%define pitch rax
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
;; SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0)
|
||||
movsxd rax, DWORD PTR arg(2)
|
||||
lea rdi, [rsi + rax*2]
|
||||
mov rdi, arg(1)
|
||||
|
||||
movq xmm0, MMWORD PTR[rsi ] ;03 02 01 00
|
||||
movq xmm2, MMWORD PTR[rsi + rax] ;13 12 11 10
|
||||
movq xmm1, MMWORD PTR[rsi + rax*2] ;23 22 21 20
|
||||
movq xmm3, MMWORD PTR[rdi + rax] ;33 32 31 30
|
||||
movsxd rax, dword ptr arg(2)
|
||||
lea rcx, [rsi + rax*2]
|
||||
%else
|
||||
%ifidn __OUTPUT_FORMAT__,x64
|
||||
%define input rcx
|
||||
%define output rdx
|
||||
%define pitch r8
|
||||
%else
|
||||
%define input rdi
|
||||
%define output rsi
|
||||
%define pitch rdx
|
||||
%endif
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro STACK_FRAME_DESTROY 0
|
||||
%define input
|
||||
%define output
|
||||
%define pitch
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
pop rbp
|
||||
%else
|
||||
%ifidn __OUTPUT_FORMAT__,x64
|
||||
%endif
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_fdct4x4_sse2)
|
||||
sym(vp8_short_fdct4x4_sse2):
|
||||
|
||||
STACK_FRAME_CREATE
|
||||
|
||||
movq xmm0, MMWORD PTR[input ] ;03 02 01 00
|
||||
movq xmm2, MMWORD PTR[input+ pitch] ;13 12 11 10
|
||||
lea input, [input+2*pitch]
|
||||
movq xmm1, MMWORD PTR[input ] ;23 22 21 20
|
||||
movq xmm3, MMWORD PTR[input+ pitch] ;33 32 31 30
|
||||
|
||||
punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00
|
||||
punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20
|
||||
|
||||
mov rdi, arg(1)
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00
|
||||
punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10
|
||||
@@ -51,6 +87,7 @@ sym(vp8_short_fdct4x4_sse2):
|
||||
psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1
|
||||
psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3
|
||||
psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
|
||||
pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
|
||||
@@ -121,17 +158,216 @@ sym(vp8_short_fdct4x4_sse2):
|
||||
punpcklqdq xmm0, xmm3 ;op[4] op[0]
|
||||
punpckhqdq xmm1, xmm3 ;op[12] op[8]
|
||||
|
||||
movdqa XMMWORD PTR[rdi + 0], xmm0
|
||||
movdqa XMMWORD PTR[rdi + 16], xmm1
|
||||
movdqa XMMWORD PTR[output + 0], xmm0
|
||||
movdqa XMMWORD PTR[output + 16], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
;; RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
STACK_FRAME_DESTROY
|
||||
|
||||
;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
|
||||
global sym(vp8_short_fdct8x4_sse2)
|
||||
sym(vp8_short_fdct8x4_sse2):
|
||||
|
||||
STACK_FRAME_CREATE
|
||||
|
||||
; read the input data
|
||||
movdqa xmm0, [input ]
|
||||
movdqa xmm2, [input+ pitch]
|
||||
lea input, [input+2*pitch]
|
||||
movdqa xmm4, [input ]
|
||||
movdqa xmm3, [input+ pitch]
|
||||
|
||||
; transpose for the first stage
|
||||
movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07
|
||||
movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27
|
||||
|
||||
punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13
|
||||
punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17
|
||||
|
||||
punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33
|
||||
punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37
|
||||
|
||||
movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13
|
||||
punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31
|
||||
|
||||
punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33
|
||||
|
||||
movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17
|
||||
punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35
|
||||
|
||||
punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37
|
||||
movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33
|
||||
|
||||
punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37
|
||||
punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36
|
||||
|
||||
movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31
|
||||
punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34
|
||||
|
||||
punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35
|
||||
|
||||
; xmm0 0
|
||||
; xmm1 1
|
||||
; xmm2 2
|
||||
; xmm3 3
|
||||
|
||||
; first stage
|
||||
movdqa xmm5, xmm0
|
||||
movdqa xmm4, xmm1
|
||||
|
||||
paddw xmm0, xmm3 ; a1 = 0 + 3
|
||||
paddw xmm1, xmm2 ; b1 = 1 + 2
|
||||
|
||||
psubw xmm4, xmm2 ; c1 = 1 - 2
|
||||
psubw xmm5, xmm3 ; d1 = 0 - 3
|
||||
|
||||
psllw xmm5, 3
|
||||
psllw xmm4, 3
|
||||
|
||||
psllw xmm0, 3
|
||||
psllw xmm1, 3
|
||||
|
||||
; output 0 and 2
|
||||
movdqa xmm2, xmm0 ; a1
|
||||
|
||||
paddw xmm0, xmm1 ; op[0] = a1 + b1
|
||||
psubw xmm2, xmm1 ; op[2] = a1 - b1
|
||||
|
||||
; output 1 and 3
|
||||
; interleave c1, d1
|
||||
movdqa xmm1, xmm5 ; d1
|
||||
punpcklwd xmm1, xmm4 ; c1 d1
|
||||
punpckhwd xmm5, xmm4 ; c1 d1
|
||||
|
||||
movdqa xmm3, xmm1
|
||||
movdqa xmm4, xmm5
|
||||
|
||||
pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
|
||||
pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
|
||||
paddd xmm1, XMMWORD PTR[GLOBAL(_14500)]
|
||||
paddd xmm4, XMMWORD PTR[GLOBAL(_14500)]
|
||||
paddd xmm3, XMMWORD PTR[GLOBAL(_7500)]
|
||||
paddd xmm5, XMMWORD PTR[GLOBAL(_7500)]
|
||||
|
||||
psrad xmm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
|
||||
psrad xmm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12
|
||||
psrad xmm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
|
||||
psrad xmm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12
|
||||
|
||||
packssdw xmm1, xmm4 ; op[1]
|
||||
packssdw xmm3, xmm5 ; op[3]
|
||||
|
||||
; done with vertical
|
||||
; transpose for the second stage
|
||||
movdqa xmm4, xmm0 ; 00 10 20 30 04 14 24 34
|
||||
movdqa xmm5, xmm2 ; 02 12 22 32 06 16 26 36
|
||||
|
||||
punpcklwd xmm0, xmm1 ; 00 01 10 11 20 21 30 31
|
||||
punpckhwd xmm4, xmm1 ; 04 05 14 15 24 25 34 35
|
||||
|
||||
punpcklwd xmm2, xmm3 ; 02 03 12 13 22 23 32 33
|
||||
punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37
|
||||
|
||||
movdqa xmm1, xmm0 ; 00 01 10 11 20 21 30 31
|
||||
punpckldq xmm0, xmm2 ; 00 01 02 03 10 11 12 13
|
||||
|
||||
punpckhdq xmm1, xmm2 ; 20 21 22 23 30 31 32 33
|
||||
|
||||
movdqa xmm2, xmm4 ; 04 05 14 15 24 25 34 35
|
||||
punpckldq xmm2, xmm5 ; 04 05 06 07 14 15 16 17
|
||||
|
||||
punpckhdq xmm4, xmm5 ; 24 25 26 27 34 35 36 37
|
||||
movdqa xmm3, xmm1 ; 20 21 22 23 30 31 32 33
|
||||
|
||||
punpckhqdq xmm3, xmm4 ; 30 31 32 33 34 35 36 37
|
||||
punpcklqdq xmm1, xmm4 ; 20 21 22 23 24 25 26 27
|
||||
|
||||
movdqa xmm4, xmm0 ; 00 01 02 03 10 11 12 13
|
||||
punpcklqdq xmm0, xmm2 ; 00 01 02 03 04 05 06 07
|
||||
|
||||
punpckhqdq xmm4, xmm2 ; 10 11 12 13 14 15 16 17
|
||||
|
||||
; xmm0 0
|
||||
; xmm1 4
|
||||
; xmm2 1
|
||||
; xmm3 3
|
||||
|
||||
movdqa xmm5, xmm0
|
||||
movdqa xmm2, xmm1
|
||||
|
||||
paddw xmm0, xmm3 ; a1 = 0 + 3
|
||||
paddw xmm1, xmm4 ; b1 = 1 + 2
|
||||
|
||||
psubw xmm4, xmm2 ; c1 = 1 - 2
|
||||
psubw xmm5, xmm3 ; d1 = 0 - 3
|
||||
|
||||
pxor xmm6, xmm6 ; zero out for compare
|
||||
|
||||
pcmpeqw xmm6, xmm5 ; d1 != 0
|
||||
|
||||
pandn xmm6, XMMWORD PTR[GLOBAL(_cmp_mask8x4)] ; clear upper,
|
||||
; and keep bit 0 of lower
|
||||
|
||||
; output 0 and 2
|
||||
movdqa xmm2, xmm0 ; a1
|
||||
|
||||
paddw xmm0, xmm1 ; a1 + b1
|
||||
psubw xmm2, xmm1 ; a1 - b1
|
||||
|
||||
paddw xmm0, XMMWORD PTR[GLOBAL(_7w)]
|
||||
paddw xmm2, XMMWORD PTR[GLOBAL(_7w)]
|
||||
|
||||
psraw xmm0, 4 ; op[0] = (a1 + b1 + 7)>>4
|
||||
psraw xmm2, 4 ; op[8] = (a1 - b1 + 7)>>4
|
||||
|
||||
; output 1 and 3
|
||||
; interleave c1, d1
|
||||
movdqa xmm1, xmm5 ; d1
|
||||
punpcklwd xmm1, xmm4 ; c1 d1
|
||||
punpckhwd xmm5, xmm4 ; c1 d1
|
||||
|
||||
movdqa xmm3, xmm1
|
||||
movdqa xmm4, xmm5
|
||||
|
||||
pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352
|
||||
|
||||
pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352
|
||||
|
||||
paddd xmm1, XMMWORD PTR[GLOBAL(_12000)]
|
||||
paddd xmm4, XMMWORD PTR[GLOBAL(_12000)]
|
||||
paddd xmm3, XMMWORD PTR[GLOBAL(_51000)]
|
||||
paddd xmm5, XMMWORD PTR[GLOBAL(_51000)]
|
||||
|
||||
psrad xmm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
|
||||
psrad xmm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16
|
||||
psrad xmm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
|
||||
psrad xmm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16
|
||||
|
||||
packssdw xmm1, xmm4 ; op[4]
|
||||
packssdw xmm3, xmm5 ; op[12]
|
||||
|
||||
paddw xmm1, xmm6 ; op[4] += (d1!=0)
|
||||
|
||||
movdqa xmm4, xmm0
|
||||
movdqa xmm5, xmm2
|
||||
|
||||
punpcklqdq xmm0, xmm1
|
||||
punpckhqdq xmm4, xmm1
|
||||
|
||||
punpcklqdq xmm2, xmm3
|
||||
punpckhqdq xmm5, xmm3
|
||||
|
||||
movdqa XMMWORD PTR[output + 0 ], xmm0
|
||||
movdqa XMMWORD PTR[output + 16], xmm2
|
||||
movdqa XMMWORD PTR[output + 32], xmm4
|
||||
movdqa XMMWORD PTR[output + 48], xmm5
|
||||
|
||||
STACK_FRAME_DESTROY
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
@@ -161,7 +397,9 @@ align 16
|
||||
_cmp_mask:
|
||||
times 4 dw 1
|
||||
times 4 dw 0
|
||||
|
||||
align 16
|
||||
_cmp_mask8x4:
|
||||
times 8 dw 1
|
||||
align 16
|
||||
_mult_sub:
|
||||
dw 1
|
||||
@@ -176,6 +414,9 @@ align 16
|
||||
_7:
|
||||
times 4 dd 7
|
||||
align 16
|
||||
_7w:
|
||||
times 8 dw 7
|
||||
align 16
|
||||
_14500:
|
||||
times 4 dd 14500
|
||||
align 16
|
||||
|
@@ -24,33 +24,31 @@ extern prototype_fdct(vp8_short_fdct4x4_mmx);
|
||||
extern prototype_fdct(vp8_short_fdct8x4_mmx);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#if 0
|
||||
|
||||
#undef vp8_fdct_short4x4
|
||||
#define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx
|
||||
|
||||
#undef vp8_fdct_short8x4
|
||||
#define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if HAVE_SSE2
|
||||
extern prototype_fdct(vp8_short_fdct8x4_wmt);
|
||||
extern prototype_fdct(vp8_short_fdct8x4_sse2);
|
||||
extern prototype_fdct(vp8_short_walsh4x4_sse2);
|
||||
|
||||
extern prototype_fdct(vp8_short_fdct4x4_sse2);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#if 1
|
||||
/* short SSE2 DCT currently disabled, does not match the MMX version */
|
||||
|
||||
#undef vp8_fdct_short4x4
|
||||
#define vp8_fdct_short4x4 vp8_short_fdct4x4_sse2
|
||||
|
||||
#undef vp8_fdct_short8x4
|
||||
#define vp8_fdct_short8x4 vp8_short_fdct8x4_sse2
|
||||
#endif
|
||||
|
||||
#undef vp8_fdct_fast4x4
|
||||
#define vp8_fdct_fast4x4 vp8_short_fdct4x4_sse2
|
||||
@@ -58,7 +56,7 @@ extern prototype_fdct(vp8_short_fdct4x4_sse2);
|
||||
#undef vp8_fdct_fast8x4
|
||||
#define vp8_fdct_fast8x4 vp8_short_fdct8x4_sse2
|
||||
|
||||
#undef vp8_fdct_walsh_short4x4
|
||||
#undef vp8_fdct_walsh_short4x4
|
||||
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_sse2
|
||||
|
||||
#endif
|
||||
|
@@ -17,6 +17,7 @@ sym(vp8_short_walsh4x4_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
@@ -143,6 +144,7 @@ sym(vp8_short_walsh4x4_sse2):
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@@ -24,5 +24,14 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE4_1
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
#undef vp8_search_full_search
|
||||
#define vp8_search_full_search vp8_full_search_sadx8
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -1,298 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "memory.h"
|
||||
#include "preproc.h"
|
||||
#include "pragmas.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Macros
|
||||
****************************************************************************/
|
||||
#define FRAMECOUNT 7
|
||||
#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
|
||||
|
||||
/****************************************************************************
|
||||
* Imports
|
||||
****************************************************************************/
|
||||
extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
|
||||
|
||||
/****************************************************************************
|
||||
* Exported Global Variables
|
||||
****************************************************************************/
|
||||
void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : temp_filter_wmt
|
||||
*
|
||||
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
|
||||
* unsigned char *s : Pointer to source frame.
|
||||
* unsigned char *d : Pointer to destination frame.
|
||||
* int bytes : Number of bytes to filter.
|
||||
* int strength : Strength of filter to apply.
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Performs a closesness adjusted temporarl blur
|
||||
*
|
||||
* SPECIAL NOTES : Destination frame can be same as source frame.
|
||||
*
|
||||
****************************************************************************/
|
||||
void temp_filter_wmt
|
||||
(
|
||||
pre_proc_instance *ppi,
|
||||
unsigned char *s,
|
||||
unsigned char *d,
|
||||
int bytes,
|
||||
int strength
|
||||
)
|
||||
{
|
||||
int byte = 0;
|
||||
unsigned char *frameptr = ppi->frame_buffer;
|
||||
|
||||
__declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3, 3, 3, 3, 3};
|
||||
__declspec(align(16)) unsigned short sixteens[] = {16, 16, 16, 16, 16, 16, 16, 16};
|
||||
|
||||
if (ppi->frame == 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
int i;
|
||||
int frame = 0;
|
||||
|
||||
do
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
*frameptr = s[byte+i];
|
||||
++frameptr;
|
||||
}
|
||||
|
||||
++frame;
|
||||
}
|
||||
while (frame < FRAMECOUNT);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
d[byte+i] = s[byte+i];
|
||||
|
||||
byte += 8;
|
||||
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int offset2 = (ppi->frame % FRAMECOUNT);
|
||||
|
||||
do
|
||||
{
|
||||
__declspec(align(16)) unsigned short counts[8];
|
||||
__declspec(align(16)) unsigned short sums[8];
|
||||
__asm
|
||||
{
|
||||
mov eax, offset2
|
||||
mov edi, s // source pixels
|
||||
pxor xmm1, xmm1 // accumulator
|
||||
|
||||
pxor xmm7, xmm7
|
||||
|
||||
mov esi, frameptr // accumulator
|
||||
pxor xmm2, xmm2 // count
|
||||
|
||||
movq xmm3, QWORD PTR [edi]
|
||||
|
||||
movq QWORD PTR [esi+8*eax], xmm3
|
||||
|
||||
punpcklbw xmm3, xmm2 // xmm3 source pixels
|
||||
mov ecx, FRAMECOUNT
|
||||
|
||||
next_frame:
|
||||
movq xmm4, QWORD PTR [esi] // get frame buffer values
|
||||
punpcklbw xmm4, xmm7 // xmm4 frame buffer pixels
|
||||
movdqa xmm6, xmm4 // save the pixel values
|
||||
psubsw xmm4, xmm3 // subtracted pixel values
|
||||
pmullw xmm4, xmm4 // square xmm4
|
||||
movd xmm5, strength
|
||||
psrlw xmm4, xmm5 // should be strength
|
||||
pmullw xmm4, threes // 3 * modifier
|
||||
movdqa xmm5, sixteens // 16s
|
||||
psubusw xmm5, xmm4 // 16 - modifiers
|
||||
movdqa xmm4, xmm5 // save the modifiers
|
||||
pmullw xmm4, xmm6 // multiplier values
|
||||
paddusw xmm1, xmm4 // accumulator
|
||||
paddusw xmm2, xmm5 // count
|
||||
add esi, 8 // next frame
|
||||
dec ecx // next set of eight pixels
|
||||
jnz next_frame
|
||||
|
||||
movdqa counts, xmm2
|
||||
psrlw xmm2, 1 // divide count by 2 for rounding
|
||||
paddusw xmm1, xmm2 // rounding added in
|
||||
|
||||
mov frameptr, esi
|
||||
|
||||
movdqa sums, xmm1
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
int blurvalue = sums[i] * ppi->fixed_divide[counts[i]];
|
||||
blurvalue >>= 16;
|
||||
d[i] = blurvalue;
|
||||
}
|
||||
|
||||
s += 8;
|
||||
d += 8;
|
||||
byte += 8;
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
|
||||
++ppi->frame;
|
||||
__asm emms
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : temp_filter_mmx
|
||||
*
|
||||
* INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
|
||||
* unsigned char *s : Pointer to source frame.
|
||||
* unsigned char *d : Pointer to destination frame.
|
||||
* int bytes : Number of bytes to filter.
|
||||
* int strength : Strength of filter to apply.
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Performs a closesness adjusted temporarl blur
|
||||
*
|
||||
* SPECIAL NOTES : Destination frame can be same as source frame.
|
||||
*
|
||||
****************************************************************************/
|
||||
void temp_filter_mmx
|
||||
(
|
||||
pre_proc_instance *ppi,
|
||||
unsigned char *s,
|
||||
unsigned char *d,
|
||||
int bytes,
|
||||
int strength
|
||||
)
|
||||
{
|
||||
int byte = 0;
|
||||
unsigned char *frameptr = ppi->frame_buffer;
|
||||
|
||||
__declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3};
|
||||
__declspec(align(16)) unsigned short sixteens[] = {16, 16, 16, 16};
|
||||
|
||||
if (ppi->frame == 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
int i;
|
||||
int frame = 0;
|
||||
|
||||
do
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
*frameptr = s[byte+i];
|
||||
++frameptr;
|
||||
}
|
||||
|
||||
++frame;
|
||||
}
|
||||
while (frame < FRAMECOUNT);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
d[byte+i] = s[byte+i];
|
||||
|
||||
byte += 4;
|
||||
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int offset2 = (ppi->frame % FRAMECOUNT);
|
||||
|
||||
do
|
||||
{
|
||||
__declspec(align(16)) unsigned short counts[8];
|
||||
__declspec(align(16)) unsigned short sums[8];
|
||||
__asm
|
||||
{
|
||||
|
||||
mov eax, offset2
|
||||
mov edi, s // source pixels
|
||||
pxor mm1, mm1 // accumulator
|
||||
pxor mm7, mm7
|
||||
|
||||
mov esi, frameptr // accumulator
|
||||
pxor mm2, mm2 // count
|
||||
|
||||
movd mm3, DWORD PTR [edi]
|
||||
movd DWORD PTR [esi+4*eax], mm3
|
||||
|
||||
punpcklbw mm3, mm2 // mm3 source pixels
|
||||
mov ecx, FRAMECOUNT
|
||||
|
||||
next_frame:
|
||||
movd mm4, DWORD PTR [esi] // get frame buffer values
|
||||
punpcklbw mm4, mm7 // mm4 frame buffer pixels
|
||||
movq mm6, mm4 // save the pixel values
|
||||
psubsw mm4, mm3 // subtracted pixel values
|
||||
pmullw mm4, mm4 // square mm4
|
||||
movd mm5, strength
|
||||
psrlw mm4, mm5 // should be strength
|
||||
pmullw mm4, threes // 3 * modifier
|
||||
movq mm5, sixteens // 16s
|
||||
psubusw mm5, mm4 // 16 - modifiers
|
||||
movq mm4, mm5 // save the modifiers
|
||||
pmullw mm4, mm6 // multiplier values
|
||||
paddusw mm1, mm4 // accumulator
|
||||
paddusw mm2, mm5 // count
|
||||
add esi, 4 // next frame
|
||||
dec ecx // next set of eight pixels
|
||||
jnz next_frame
|
||||
|
||||
movq counts, mm2
|
||||
psrlw mm2, 1 // divide count by 2 for rounding
|
||||
paddusw mm1, mm2 // rounding added in
|
||||
|
||||
mov frameptr, esi
|
||||
|
||||
movq sums, mm1
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
int blurvalue = sums[i] * ppi->fixed_divide[counts[i]];
|
||||
blurvalue >>= 16;
|
||||
d[i] = blurvalue;
|
||||
}
|
||||
|
||||
s += 4;
|
||||
d += 4;
|
||||
byte += 4;
|
||||
}
|
||||
while (byte < bytes);
|
||||
}
|
||||
|
||||
++ppi->frame;
|
||||
__asm emms
|
||||
}
|
@@ -253,10 +253,9 @@ rq_zigzag_1c:
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
|
||||
; short *qcoeff_ptr,short *dequant_ptr,
|
||||
; short *scan_mask, short *round_ptr,
|
||||
; short *inv_scan_order, short *round_ptr,
|
||||
; short *quant_ptr, short *dqcoeff_ptr);
|
||||
global sym(vp8_fast_quantize_b_impl_sse2)
|
||||
sym(vp8_fast_quantize_b_impl_sse2):
|
||||
@@ -265,32 +264,18 @@ sym(vp8_fast_quantize_b_impl_sse2):
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
; end prolog
|
||||
|
||||
ALIGN_STACK 16, rax
|
||||
|
||||
%define save_xmm6 0
|
||||
%define save_xmm7 16
|
||||
|
||||
%define vp8_fastquantizeb_stack_size save_xmm7 + 16
|
||||
|
||||
sub rsp, vp8_fastquantizeb_stack_size
|
||||
|
||||
movdqa XMMWORD PTR[rsp + save_xmm6], xmm6
|
||||
movdqa XMMWORD PTR[rsp + save_xmm7], xmm7
|
||||
|
||||
mov rdx, arg(0) ;coeff_ptr
|
||||
mov rcx, arg(2) ;dequant_ptr
|
||||
mov rax, arg(3) ;scan_mask
|
||||
mov rdi, arg(4) ;round_ptr
|
||||
mov rsi, arg(5) ;quant_ptr
|
||||
|
||||
movdqa xmm0, XMMWORD PTR[rdx]
|
||||
movdqa xmm4, XMMWORD PTR[rdx + 16]
|
||||
|
||||
movdqa xmm6, XMMWORD PTR[rdi] ;round lo
|
||||
movdqa xmm7, XMMWORD PTR[rdi + 16] ;round hi
|
||||
movdqa xmm2, XMMWORD PTR[rdi] ;round lo
|
||||
movdqa xmm3, XMMWORD PTR[rdi + 16] ;round hi
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
movdqa xmm5, xmm4
|
||||
@@ -303,8 +288,8 @@ sym(vp8_fast_quantize_b_impl_sse2):
|
||||
psubw xmm1, xmm0 ;x = abs(z)
|
||||
psubw xmm5, xmm4 ;x = abs(z)
|
||||
|
||||
paddw xmm1, xmm6
|
||||
paddw xmm5, xmm7
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm5, xmm3
|
||||
|
||||
pmulhw xmm1, XMMWORD PTR[rsi]
|
||||
pmulhw xmm5, XMMWORD PTR[rsi + 16]
|
||||
@@ -312,8 +297,8 @@ sym(vp8_fast_quantize_b_impl_sse2):
|
||||
mov rdi, arg(1) ;qcoeff_ptr
|
||||
mov rsi, arg(6) ;dqcoeff_ptr
|
||||
|
||||
movdqa xmm6, XMMWORD PTR[rcx]
|
||||
movdqa xmm7, XMMWORD PTR[rcx + 16]
|
||||
movdqa xmm2, XMMWORD PTR[rcx]
|
||||
movdqa xmm3, XMMWORD PTR[rcx + 16]
|
||||
|
||||
pxor xmm1, xmm0
|
||||
pxor xmm5, xmm4
|
||||
@@ -323,64 +308,47 @@ sym(vp8_fast_quantize_b_impl_sse2):
|
||||
movdqa XMMWORD PTR[rdi], xmm1
|
||||
movdqa XMMWORD PTR[rdi + 16], xmm5
|
||||
|
||||
pmullw xmm6, xmm1
|
||||
pmullw xmm7, xmm5
|
||||
pmullw xmm2, xmm1
|
||||
pmullw xmm3, xmm5
|
||||
|
||||
movdqa xmm2, XMMWORD PTR[rax]
|
||||
movdqa xmm3, XMMWORD PTR[rax+16];
|
||||
mov rdi, arg(3) ;inv_scan_order
|
||||
|
||||
pxor xmm4, xmm4 ;clear all bits
|
||||
; Start with 16
|
||||
pxor xmm4, xmm4 ;clear all bits
|
||||
pcmpeqw xmm1, xmm4
|
||||
pcmpeqw xmm5, xmm4
|
||||
|
||||
pcmpeqw xmm4, xmm4 ;set all bits
|
||||
pcmpeqw xmm4, xmm4 ;set all bits
|
||||
pxor xmm1, xmm4
|
||||
pxor xmm5, xmm4
|
||||
|
||||
psrlw xmm1, 15
|
||||
psrlw xmm5, 15
|
||||
pand xmm1, XMMWORD PTR[rdi]
|
||||
pand xmm5, XMMWORD PTR[rdi+16]
|
||||
|
||||
pmaddwd xmm1, xmm2
|
||||
pmaddwd xmm5, xmm3
|
||||
pmaxsw xmm1, xmm5
|
||||
|
||||
movq xmm2, xmm1
|
||||
movq xmm3, xmm5
|
||||
; now down to 8
|
||||
pshufd xmm5, xmm1, 00001110b
|
||||
|
||||
psrldq xmm1, 8
|
||||
psrldq xmm5, 8
|
||||
pmaxsw xmm1, xmm5
|
||||
|
||||
paddd xmm1, xmm5
|
||||
paddd xmm2, xmm3
|
||||
; only 4 left
|
||||
pshuflw xmm5, xmm1, 00001110b
|
||||
|
||||
paddd xmm1, xmm2
|
||||
movq xmm5, xmm1
|
||||
pmaxsw xmm1, xmm5
|
||||
|
||||
psrldq xmm1, 4
|
||||
paddd xmm5, xmm1
|
||||
; okay, just 2!
|
||||
pshuflw xmm5, xmm1, 00000001b
|
||||
|
||||
movq rcx, xmm5
|
||||
and rcx, 0xffff
|
||||
pmaxsw xmm1, xmm5
|
||||
|
||||
xor rdx, rdx
|
||||
sub rdx, rcx
|
||||
movd rax, xmm1
|
||||
and rax, 0xff
|
||||
|
||||
bsr rax, rcx
|
||||
inc rax
|
||||
|
||||
sar rdx, 31
|
||||
and rax, rdx
|
||||
|
||||
movdqa XMMWORD PTR[rsi], xmm6 ;store dqcoeff
|
||||
movdqa XMMWORD PTR[rsi + 16], xmm7 ;store dqcoeff
|
||||
|
||||
movdqa xmm6, XMMWORD PTR[rsp + save_xmm6]
|
||||
movdqa xmm7, XMMWORD PTR[rsp + save_xmm7]
|
||||
|
||||
add rsp, vp8_fastquantizeb_stack_size
|
||||
pop rsp
|
||||
movdqa XMMWORD PTR[rsi], xmm2 ;store dqcoeff
|
||||
movdqa XMMWORD PTR[rsi + 16], xmm3 ;store dqcoeff
|
||||
|
||||
; begin epilog
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
|
114
vp8/encoder/x86/quantize_ssse3.asm
Executable file
114
vp8/encoder/x86/quantize_ssse3.asm
Executable file
@@ -0,0 +1,114 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
|
||||
;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
|
||||
; short *qcoeff_ptr,short *dequant_ptr,
|
||||
; short *round_ptr,
|
||||
; short *quant_ptr, short *dqcoeff_ptr);
|
||||
;
|
||||
global sym(vp8_fast_quantize_b_impl_ssse3)
|
||||
sym(vp8_fast_quantize_b_impl_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rdx, arg(0) ;coeff_ptr
|
||||
mov rdi, arg(3) ;round_ptr
|
||||
mov rsi, arg(4) ;quant_ptr
|
||||
|
||||
movdqa xmm0, [rdx]
|
||||
movdqa xmm4, [rdx + 16]
|
||||
|
||||
movdqa xmm2, [rdi] ;round lo
|
||||
movdqa xmm3, [rdi + 16] ;round hi
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
movdqa xmm5, xmm4
|
||||
|
||||
psraw xmm0, 15 ;sign of z (aka sz)
|
||||
psraw xmm4, 15 ;sign of z (aka sz)
|
||||
|
||||
pabsw xmm1, xmm1
|
||||
pabsw xmm5, xmm5
|
||||
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm5, xmm3
|
||||
|
||||
pmulhw xmm1, [rsi]
|
||||
pmulhw xmm5, [rsi + 16]
|
||||
|
||||
mov rdi, arg(1) ;qcoeff_ptr
|
||||
mov rcx, arg(2) ;dequant_ptr
|
||||
mov rsi, arg(5) ;dqcoeff_ptr
|
||||
|
||||
pxor xmm1, xmm0
|
||||
pxor xmm5, xmm4
|
||||
psubw xmm1, xmm0
|
||||
psubw xmm5, xmm4
|
||||
|
||||
movdqa [rdi], xmm1
|
||||
movdqa [rdi + 16], xmm5
|
||||
|
||||
movdqa xmm2, [rcx]
|
||||
movdqa xmm3, [rcx + 16]
|
||||
|
||||
pxor xmm4, xmm4
|
||||
pmullw xmm2, xmm1
|
||||
pmullw xmm3, xmm5
|
||||
|
||||
pcmpeqw xmm1, xmm4 ;non zero mask
|
||||
pcmpeqw xmm5, xmm4 ;non zero mask
|
||||
packsswb xmm1, xmm5
|
||||
pshufb xmm1, [ GLOBAL(zz_shuf)]
|
||||
|
||||
pmovmskb edx, xmm1
|
||||
|
||||
; xor ecx, ecx
|
||||
; mov eax, -1
|
||||
;find_eob_loop:
|
||||
; shr edx, 1
|
||||
; jc fq_skip
|
||||
; mov eax, ecx
|
||||
;fq_skip:
|
||||
; inc ecx
|
||||
; cmp ecx, 16
|
||||
; jne find_eob_loop
|
||||
xor rdi, rdi
|
||||
mov eax, -1
|
||||
xor dx, ax ;flip the bits for bsr
|
||||
bsr eax, edx
|
||||
|
||||
movdqa [rsi], xmm2 ;store dqcoeff
|
||||
movdqa [rsi + 16], xmm3 ;store dqcoeff
|
||||
|
||||
sub edi, edx ;check for all zeros in bit mask
|
||||
sar edi, 31 ;0 or -1
|
||||
add eax, 1
|
||||
and eax, edi ;if the bit mask was all zero,
|
||||
;then eob = 0
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
zz_shuf:
|
||||
db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
|
File diff suppressed because it is too large
Load Diff
353
vp8/encoder/x86/sad_sse4.asm
Normal file
353
vp8/encoder/x86/sad_sse4.asm
Normal file
@@ -0,0 +1,353 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%macro PROCESS_16X2X8 1
|
||||
%if %1
|
||||
movdqa xmm0, XMMWORD PTR [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
movq xmm2, MMWORD PTR [rdi+16]
|
||||
punpcklqdq xmm1, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm1, xmm2
|
||||
paddw xmm1, xmm3
|
||||
paddw xmm1, xmm4
|
||||
%else
|
||||
movdqa xmm0, XMMWORD PTR [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
movq xmm2, MMWORD PTR [rdi+16]
|
||||
punpcklqdq xmm5, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm5, xmm3
|
||||
paddw xmm5, xmm4
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movdqa xmm0, XMMWORD PTR [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
movq xmm2, MMWORD PTR [rdi+ rdx+16]
|
||||
punpcklqdq xmm5, xmm3
|
||||
punpcklqdq xmm3, xmm2
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
|
||||
psrldq xmm0, 8
|
||||
movdqa xmm4, xmm3
|
||||
mpsadbw xmm3, xmm0, 0x0
|
||||
mpsadbw xmm4, xmm0, 0x5
|
||||
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm5, xmm3
|
||||
paddw xmm5, xmm4
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_8X2X8 1
|
||||
%if %1
|
||||
movq xmm0, MMWORD PTR [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm1, xmm2
|
||||
%else
|
||||
movq xmm0, MMWORD PTR [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm5, xmm2
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movq xmm0, MMWORD PTR [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa xmm2, xmm5
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
mpsadbw xmm2, xmm0, 0x5
|
||||
paddw xmm5, xmm2
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
%macro PROCESS_4X2X8 1
|
||||
%if %1
|
||||
movd xmm0, [rsi]
|
||||
movq xmm1, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
mpsadbw xmm1, xmm0, 0x0
|
||||
%else
|
||||
movd xmm0, [rsi]
|
||||
movq xmm5, MMWORD PTR [rdi]
|
||||
movq xmm3, MMWORD PTR [rdi+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endif
|
||||
movd xmm0, [rsi + rax]
|
||||
movq xmm5, MMWORD PTR [rdi+ rdx]
|
||||
movq xmm3, MMWORD PTR [rdi+ rdx+8]
|
||||
punpcklqdq xmm5, xmm3
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
mpsadbw xmm5, xmm0, 0x0
|
||||
|
||||
paddw xmm1, xmm5
|
||||
%endmacro
|
||||
|
||||
|
||||
;void vp8_sad16x16x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array);
|
||||
global sym(vp8_sad16x16x8_sse4)
|
||||
sym(vp8_sad16x16x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad16x8x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad16x8x8_sse4)
|
||||
sym(vp8_sad16x8x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_16X2X8 1
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
PROCESS_16X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad8x8x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad8x8x8_sse4)
|
||||
sym(vp8_sad8x8x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad8x16x8_sse4(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad8x16x8_sse4)
|
||||
sym(vp8_sad8x16x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_8X2X8 1
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
PROCESS_8X2X8 0
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_sad4x4x8_c(
|
||||
; const unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; const unsigned char *ref_ptr,
|
||||
; int ref_stride,
|
||||
; unsigned short *sad_array
|
||||
;);
|
||||
global sym(vp8_sad4x4x8_sse4)
|
||||
sym(vp8_sad4x4x8_sse4):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;ref_stride
|
||||
|
||||
PROCESS_4X2X8 1
|
||||
PROCESS_4X2X8 0
|
||||
|
||||
mov rdi, arg(4) ;Results
|
||||
movdqu XMMWORD PTR [rdi], xmm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
@@ -77,6 +77,7 @@ sym(vp8_subtract_mby_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
@@ -138,6 +139,7 @@ submby_loop:
|
||||
pop rsi
|
||||
; begin epilog
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
207
vp8/encoder/x86/temporal_filter_apply_sse2.asm
Normal file
207
vp8/encoder/x86/temporal_filter_apply_sse2.asm
Normal file
@@ -0,0 +1,207 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
; void vp8_temporal_filter_apply_sse2 | arg
|
||||
; (unsigned char *frame1, | 0
|
||||
; unsigned int stride, | 1
|
||||
; unsigned char *frame2, | 2
|
||||
; unsigned int block_size, | 3
|
||||
; int strength, | 4
|
||||
; int filter_weight, | 5
|
||||
; unsigned int *accumulator, | 6
|
||||
; unsigned short *count) | 7
|
||||
global sym(vp8_temporal_filter_apply_sse2)
|
||||
sym(vp8_temporal_filter_apply_sse2):
|
||||
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
ALIGN_STACK 16, rax
|
||||
%define block_size 0
|
||||
%define strength 16
|
||||
%define filter_weight 32
|
||||
%define rounding_bit 48
|
||||
%define rbp_backup 64
|
||||
%define stack_size 80
|
||||
sub rsp, stack_size
|
||||
mov [rsp + rbp_backup], rbp
|
||||
; end prolog
|
||||
|
||||
mov rdx, arg(3)
|
||||
mov [rsp + block_size], rdx
|
||||
movd xmm6, arg(4)
|
||||
movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read
|
||||
|
||||
; calculate the rounding bit outside the loop
|
||||
; 0x8000 >> (16 - strength)
|
||||
mov rdx, 16
|
||||
sub rdx, arg(4) ; 16 - strength
|
||||
movd xmm4, rdx ; can't use rdx w/ shift
|
||||
movdqa xmm5, [GLOBAL(_const_top_bit)]
|
||||
psrlw xmm5, xmm4
|
||||
movdqa [rsp + rounding_bit], xmm5
|
||||
|
||||
mov rsi, arg(0) ; src/frame1
|
||||
mov rdx, arg(2) ; predictor frame
|
||||
mov rdi, arg(6) ; accumulator
|
||||
mov rax, arg(7) ; count
|
||||
|
||||
; dup the filter weight and store for later
|
||||
movd xmm0, arg(5) ; filter_weight
|
||||
pshuflw xmm0, xmm0, 0
|
||||
punpcklwd xmm0, xmm0
|
||||
movdqa [rsp + filter_weight], xmm0
|
||||
|
||||
mov rbp, arg(1) ; stride
|
||||
pxor xmm7, xmm7 ; zero for extraction
|
||||
|
||||
lea rcx, [rdx + 16*16*1]
|
||||
cmp dword ptr [rsp + block_size], 8
|
||||
jne temporal_filter_apply_load_16
|
||||
lea rcx, [rdx + 8*8*1]
|
||||
|
||||
temporal_filter_apply_load_8:
|
||||
movq xmm0, [rsi] ; first row
|
||||
lea rsi, [rsi + rbp] ; += stride
|
||||
punpcklbw xmm0, xmm7 ; src[ 0- 7]
|
||||
movq xmm1, [rsi] ; second row
|
||||
lea rsi, [rsi + rbp] ; += stride
|
||||
punpcklbw xmm1, xmm7 ; src[ 8-15]
|
||||
jmp temporal_filter_apply_load_finished
|
||||
|
||||
temporal_filter_apply_load_16:
|
||||
movdqu xmm0, [rsi] ; src (frame1)
|
||||
lea rsi, [rsi + rbp] ; += stride
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, xmm7 ; src[ 0- 7]
|
||||
punpckhbw xmm1, xmm7 ; src[ 8-15]
|
||||
|
||||
temporal_filter_apply_load_finished:
|
||||
movdqa xmm2, [rdx] ; predictor (frame2)
|
||||
movdqa xmm3, xmm2
|
||||
punpcklbw xmm2, xmm7 ; pred[ 0- 7]
|
||||
punpckhbw xmm3, xmm7 ; pred[ 8-15]
|
||||
|
||||
; modifier = src_byte - pixel_value
|
||||
psubw xmm0, xmm2 ; src - pred[ 0- 7]
|
||||
psubw xmm1, xmm3 ; src - pred[ 8-15]
|
||||
|
||||
; modifier *= modifier
|
||||
pmullw xmm0, xmm0 ; modifer[ 0- 7]^2
|
||||
pmullw xmm1, xmm1 ; modifer[ 8-15]^2
|
||||
|
||||
; modifier *= 3
|
||||
pmullw xmm0, [GLOBAL(_const_3w)]
|
||||
pmullw xmm1, [GLOBAL(_const_3w)]
|
||||
|
||||
; modifer += 0x8000 >> (16 - strength)
|
||||
paddw xmm0, [rsp + rounding_bit]
|
||||
paddw xmm1, [rsp + rounding_bit]
|
||||
|
||||
; modifier >>= strength
|
||||
psrlw xmm0, [rsp + strength]
|
||||
psrlw xmm1, [rsp + strength]
|
||||
|
||||
; modifier = 16 - modifier
|
||||
; saturation takes care of modifier > 16
|
||||
movdqa xmm3, [GLOBAL(_const_16w)]
|
||||
movdqa xmm2, [GLOBAL(_const_16w)]
|
||||
psubusw xmm3, xmm1
|
||||
psubusw xmm2, xmm0
|
||||
|
||||
; modifier *= filter_weight
|
||||
pmullw xmm2, [rsp + filter_weight]
|
||||
pmullw xmm3, [rsp + filter_weight]
|
||||
|
||||
; count
|
||||
movdqa xmm4, [rax]
|
||||
movdqa xmm5, [rax+16]
|
||||
; += modifier
|
||||
paddw xmm4, xmm2
|
||||
paddw xmm5, xmm3
|
||||
; write back
|
||||
movdqa [rax], xmm4
|
||||
movdqa [rax+16], xmm5
|
||||
lea rax, [rax + 16*2] ; count += 16*(sizeof(short))
|
||||
|
||||
; load and extract the predictor up to shorts
|
||||
pxor xmm7, xmm7
|
||||
movdqa xmm0, [rdx]
|
||||
lea rdx, [rdx + 16*1] ; pred += 16*(sizeof(char))
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, xmm7 ; pred[ 0- 7]
|
||||
punpckhbw xmm1, xmm7 ; pred[ 8-15]
|
||||
|
||||
; modifier *= pixel_value
|
||||
pmullw xmm0, xmm2
|
||||
pmullw xmm1, xmm3
|
||||
|
||||
; expand to double words
|
||||
movdqa xmm2, xmm0
|
||||
punpcklwd xmm0, xmm7 ; [ 0- 3]
|
||||
punpckhwd xmm2, xmm7 ; [ 4- 7]
|
||||
movdqa xmm3, xmm1
|
||||
punpcklwd xmm1, xmm7 ; [ 8-11]
|
||||
punpckhwd xmm3, xmm7 ; [12-15]
|
||||
|
||||
; accumulator
|
||||
movdqa xmm4, [rdi]
|
||||
movdqa xmm5, [rdi+16]
|
||||
movdqa xmm6, [rdi+32]
|
||||
movdqa xmm7, [rdi+48]
|
||||
; += modifier
|
||||
paddw xmm4, xmm0
|
||||
paddw xmm5, xmm2
|
||||
paddw xmm6, xmm1
|
||||
paddw xmm7, xmm3
|
||||
; write back
|
||||
movdqa [rdi], xmm4
|
||||
movdqa [rdi+16], xmm5
|
||||
movdqa [rdi+32], xmm6
|
||||
movdqa [rdi+48], xmm7
|
||||
lea rdi, [rdi + 16*4] ; accumulator += 16*(sizeof(int))
|
||||
|
||||
cmp rdx, rcx
|
||||
je temporal_filter_apply_epilog
|
||||
pxor xmm7, xmm7 ; zero for extraction
|
||||
cmp dword ptr [rsp + block_size], 16
|
||||
je temporal_filter_apply_load_16
|
||||
jmp temporal_filter_apply_load_8
|
||||
|
||||
temporal_filter_apply_epilog:
|
||||
; begin epilog
|
||||
mov rbp, [rsp + rbp_backup]
|
||||
add rsp, stack_size
|
||||
pop rsp
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
_const_3w:
|
||||
times 8 dw 3
|
||||
align 16
|
||||
_const_top_bit:
|
||||
times 8 dw 1<<15
|
||||
align 16
|
||||
_const_16w
|
||||
times 8 dw 16
|
27
vp8/encoder/x86/temporal_filter_x86.h
Normal file
27
vp8/encoder/x86/temporal_filter_x86.h
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __INC_VP8_TEMPORAL_FILTER_X86_H
|
||||
#define __INC_VP8_TEMPORAL_FILTER_X86_H
|
||||
|
||||
#if HAVE_SSE2
|
||||
extern prototype_apply(vp8_temporal_filter_apply_sse2);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
#undef vp8_temporal_filter_apply
|
||||
#define vp8_temporal_filter_apply vp8_temporal_filter_apply_sse2
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif // __INC_VP8_TEMPORAL_FILTER_X86_H
|
@@ -297,4 +297,31 @@ extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if HAVE_SSE4_1
|
||||
extern prototype_sad_multi_same_address_1(vp8_sad16x16x8_sse4);
|
||||
extern prototype_sad_multi_same_address_1(vp8_sad16x8x8_sse4);
|
||||
extern prototype_sad_multi_same_address_1(vp8_sad8x16x8_sse4);
|
||||
extern prototype_sad_multi_same_address_1(vp8_sad8x8x8_sse4);
|
||||
extern prototype_sad_multi_same_address_1(vp8_sad4x4x8_sse4);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_variance_sad16x16x8
|
||||
#define vp8_variance_sad16x16x8 vp8_sad16x16x8_sse4
|
||||
|
||||
#undef vp8_variance_sad16x8x8
|
||||
#define vp8_variance_sad16x8x8 vp8_sad16x8x8_sse4
|
||||
|
||||
#undef vp8_variance_sad8x16x8
|
||||
#define vp8_variance_sad8x16x8 vp8_sad8x16x8_sse4
|
||||
|
||||
#undef vp8_variance_sad8x8x8
|
||||
#define vp8_variance_sad8x8x8 vp8_sad8x8x8_sse4
|
||||
|
||||
#undef vp8_variance_sad4x4x8
|
||||
#define vp8_variance_sad4x4x8 vp8_sad4x4x8_sse4
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@@ -18,11 +18,10 @@
|
||||
#if HAVE_MMX
|
||||
void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
|
||||
{
|
||||
vp8_short_fdct4x4_c(input, output, pitch);
|
||||
vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
|
||||
vp8_short_fdct4x4_mmx(input, output, pitch);
|
||||
vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
|
||||
}
|
||||
|
||||
|
||||
int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
|
||||
short *qcoeff_ptr, short *dequant_ptr,
|
||||
short *scan_mask, short *round_ptr,
|
||||
@@ -33,7 +32,7 @@ void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *zbin_ptr = b->zbin;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant;
|
||||
short *quant_ptr = b->quant_fast;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
@@ -82,22 +81,16 @@ void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2
|
||||
void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
|
||||
{
|
||||
vp8_short_fdct4x4_sse2(input, output, pitch);
|
||||
vp8_short_fdct4x4_sse2(input + 4, output + 16, pitch);
|
||||
}
|
||||
|
||||
int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
|
||||
short *qcoeff_ptr, short *dequant_ptr,
|
||||
short *scan_mask, short *round_ptr,
|
||||
const short *inv_scan_order, short *round_ptr,
|
||||
short *quant_ptr, short *dqcoeff_ptr);
|
||||
void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
|
||||
short *coeff_ptr = b->coeff;
|
||||
short *round_ptr = b->round;
|
||||
short *quant_ptr = b->quant;
|
||||
short *quant_ptr = b->quant_fast;
|
||||
short *qcoeff_ptr = d->qcoeff;
|
||||
short *dqcoeff_ptr = d->dqcoeff;
|
||||
short *dequant_ptr = d->dequant;
|
||||
@@ -106,8 +99,7 @@ void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
|
||||
coeff_ptr,
|
||||
qcoeff_ptr,
|
||||
dequant_ptr,
|
||||
scan_mask,
|
||||
|
||||
vp8_default_inv_zig_zag,
|
||||
round_ptr,
|
||||
quant_ptr,
|
||||
dqcoeff_ptr
|
||||
@@ -179,6 +171,25 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr,
|
||||
short *qcoeff_ptr, short *dequant_ptr,
|
||||
short *round_ptr,
|
||||
short *quant_ptr, short *dqcoeff_ptr);
|
||||
void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
|
||||
{
|
||||
d->eob = vp8_fast_quantize_b_impl_ssse3(
|
||||
b->coeff,
|
||||
d->qcoeff,
|
||||
d->dequant,
|
||||
b->round,
|
||||
b->quant_fast,
|
||||
d->dqcoeff
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
@@ -188,6 +199,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
int wmt_enabled = flags & HAS_SSE2;
|
||||
int SSE3Enabled = flags & HAS_SSE3;
|
||||
int SSSE3Enabled = flags & HAS_SSSE3;
|
||||
int SSE4_1Enabled = flags & HAS_SSE4_1;
|
||||
|
||||
/* Note:
|
||||
*
|
||||
@@ -198,7 +210,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
|
||||
/* Override default functions with fastest ones for this CPU. */
|
||||
#if HAVE_MMX
|
||||
|
||||
if (mmx_enabled)
|
||||
{
|
||||
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx;
|
||||
@@ -230,18 +241,11 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.get8x8var = vp8_get8x8var_mmx;
|
||||
cpi->rtcd.variance.get16x16var = vp8_get16x16var_mmx;
|
||||
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx;
|
||||
#if 0 // new fdct
|
||||
|
||||
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx;
|
||||
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx;
|
||||
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx;
|
||||
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx;
|
||||
#else
|
||||
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
|
||||
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
|
||||
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c;
|
||||
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_c;
|
||||
|
||||
#endif
|
||||
|
||||
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
|
||||
|
||||
@@ -254,10 +258,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
|
||||
/*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/
|
||||
}
|
||||
|
||||
#endif
|
||||
#if HAVE_SSE2
|
||||
|
||||
#if HAVE_SSE2
|
||||
if (wmt_enabled)
|
||||
{
|
||||
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt;
|
||||
@@ -306,11 +309,12 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
|
||||
/*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2;
|
||||
|
||||
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
|
||||
}
|
||||
|
||||
#endif
|
||||
#if HAVE_SSE3
|
||||
|
||||
#if HAVE_SSE3
|
||||
if (SSE3Enabled)
|
||||
{
|
||||
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3;
|
||||
@@ -319,8 +323,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3;
|
||||
cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3;
|
||||
cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3;
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
cpi->rtcd.search.full_search = vp8_full_search_sadx3;
|
||||
|
||||
#endif
|
||||
cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3;
|
||||
cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3;
|
||||
cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3;
|
||||
@@ -328,16 +333,32 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3;
|
||||
cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4;
|
||||
}
|
||||
|
||||
#endif
|
||||
#if HAVE_SSSE3
|
||||
|
||||
#if HAVE_SSSE3
|
||||
if (SSSE3Enabled)
|
||||
{
|
||||
cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3;
|
||||
cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3;
|
||||
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE4_1
|
||||
if (SSE4_1Enabled)
|
||||
{
|
||||
cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4;
|
||||
cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4;
|
||||
cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4;
|
||||
cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4;
|
||||
cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4;
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
cpi->rtcd.search.full_search = vp8_full_search_sadx8;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
@@ -17,7 +17,6 @@ VP8_COMMON_SRCS-yes += common/type_aliases.h
|
||||
VP8_COMMON_SRCS-yes += common/pragmas.h
|
||||
|
||||
CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
|
||||
VP8_COMMON_SRCS-yes += common/preproc.h
|
||||
VP8_COMMON_SRCS-yes += common/vpxerrors.h
|
||||
|
||||
CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
|
||||
|
@@ -37,6 +37,8 @@ struct vp8_extracfg
|
||||
unsigned int arnr_max_frames; /* alt_ref Noise Reduction Max Frame Count */
|
||||
unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */
|
||||
unsigned int arnr_type; /* alt_ref filter type */
|
||||
vp8e_tuning tuning;
|
||||
unsigned int cq_level; /* constrained quality level */
|
||||
|
||||
};
|
||||
|
||||
@@ -67,6 +69,8 @@ static const struct extraconfig_map extracfg_map[] =
|
||||
0, /* arnr_max_frames */
|
||||
3, /* arnr_strength */
|
||||
3, /* arnr_type*/
|
||||
0, /* tuning*/
|
||||
10, /* cq_level */
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -104,6 +108,7 @@ update_error_state(vpx_codec_alg_priv_t *ctx,
|
||||
}
|
||||
|
||||
|
||||
#undef ERROR
|
||||
#define ERROR(str) do {\
|
||||
ctx->base.err_detail = str;\
|
||||
return VPX_CODEC_INVALID_PARAM;\
|
||||
@@ -132,8 +137,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
const vpx_codec_enc_cfg_t *cfg,
|
||||
const struct vp8_extracfg *vp8_cfg)
|
||||
{
|
||||
RANGE_CHECK(cfg, g_w, 2, 16384);
|
||||
RANGE_CHECK(cfg, g_h, 2, 16384);
|
||||
RANGE_CHECK(cfg, g_w, 1, 16384);
|
||||
RANGE_CHECK(cfg, g_h, 1, 16384);
|
||||
RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
|
||||
RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
|
||||
RANGE_CHECK_HI(cfg, g_profile, 3);
|
||||
@@ -145,7 +150,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
#else
|
||||
RANGE_CHECK_HI(cfg, g_lag_in_frames, 0);
|
||||
#endif
|
||||
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CBR);
|
||||
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ);
|
||||
RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100);
|
||||
RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
|
||||
RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO);
|
||||
@@ -187,7 +192,9 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
|
||||
RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
|
||||
RANGE_CHECK(vp8_cfg, arnr_type, 1, 3);
|
||||
RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
if (cfg->g_pass == VPX_RC_LAST_PASS)
|
||||
{
|
||||
int mb_r = (cfg->g_h + 15) / 16;
|
||||
@@ -211,6 +218,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
|
||||
if ((int)(stats->count + 0.5) != n_packets - 1)
|
||||
ERROR("rc_twopass_stats_in missing EOS stats packet");
|
||||
}
|
||||
#endif
|
||||
|
||||
return VPX_CODEC_OK;
|
||||
}
|
||||
@@ -295,11 +303,16 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
|
||||
{
|
||||
oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
|
||||
}
|
||||
else if (cfg.rc_end_usage == VPX_CQ)
|
||||
{
|
||||
oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
|
||||
}
|
||||
|
||||
oxcf->target_bandwidth = cfg.rc_target_bitrate;
|
||||
|
||||
oxcf->best_allowed_q = cfg.rc_min_quantizer;
|
||||
oxcf->worst_allowed_q = cfg.rc_max_quantizer;
|
||||
oxcf->cq_level = vp8_cfg.cq_level;
|
||||
oxcf->fixed_q = -1;
|
||||
|
||||
oxcf->under_shoot_pct = cfg.rc_undershoot_pct;
|
||||
@@ -335,6 +348,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
|
||||
oxcf->arnr_strength = vp8_cfg.arnr_strength;
|
||||
oxcf->arnr_type = vp8_cfg.arnr_type;
|
||||
|
||||
oxcf->tuning = vp8_cfg.tuning;
|
||||
|
||||
/*
|
||||
printf("Current VP8 Settings: \n");
|
||||
@@ -448,6 +462,8 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx,
|
||||
MAP(VP8E_SET_ARNR_MAXFRAMES, xcfg.arnr_max_frames);
|
||||
MAP(VP8E_SET_ARNR_STRENGTH , xcfg.arnr_strength);
|
||||
MAP(VP8E_SET_ARNR_TYPE , xcfg.arnr_type);
|
||||
MAP(VP8E_SET_TUNING, xcfg.tuning);
|
||||
MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level);
|
||||
|
||||
}
|
||||
|
||||
@@ -860,8 +876,16 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
|
||||
{
|
||||
|
||||
YV12_BUFFER_CONFIG sd;
|
||||
vp8_ppflags_t flags = {0};
|
||||
|
||||
if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, ctx->preview_ppcfg.deblocking_level, ctx->preview_ppcfg.noise_level, ctx->preview_ppcfg.post_proc_flag))
|
||||
if (ctx->preview_ppcfg.post_proc_flag)
|
||||
{
|
||||
flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag;
|
||||
flags.deblocking_level = ctx->preview_ppcfg.deblocking_level;
|
||||
flags.noise_level = ctx->preview_ppcfg.noise_level;
|
||||
}
|
||||
|
||||
if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, &flags))
|
||||
{
|
||||
|
||||
/*
|
||||
@@ -1020,6 +1044,8 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] =
|
||||
{VP8E_SET_ARNR_MAXFRAMES, set_param},
|
||||
{VP8E_SET_ARNR_STRENGTH , set_param},
|
||||
{VP8E_SET_ARNR_TYPE , set_param},
|
||||
{VP8E_SET_TUNING, set_param},
|
||||
{VP8E_SET_CQ_LEVEL, set_param},
|
||||
{ -1, NULL},
|
||||
};
|
||||
|
||||
@@ -1055,7 +1081,6 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
|
||||
|
||||
4, /* rc_min_quantizer */
|
||||
63, /* rc_max_quantizer */
|
||||
|
||||
95, /* rc_undershoot_pct */
|
||||
200, /* rc_overshoot_pct */
|
||||
|
||||
|
@@ -65,12 +65,19 @@ struct vpx_codec_alg_priv
|
||||
vpx_codec_priv_t base;
|
||||
vpx_codec_mmap_t mmaps[NELEMENTS(vp8_mem_req_segs)-1];
|
||||
vpx_codec_dec_cfg_t cfg;
|
||||
vp8_stream_info_t si;
|
||||
vp8_stream_info_t si;
|
||||
int defer_alloc;
|
||||
int decoder_init;
|
||||
VP8D_PTR pbi;
|
||||
int postproc_cfg_set;
|
||||
vp8_postproc_cfg_t postproc_cfg;
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
unsigned int dbg_postproc_flag;
|
||||
int dbg_color_ref_frame_flag;
|
||||
int dbg_color_mb_modes_flag;
|
||||
int dbg_color_b_modes_flag;
|
||||
int dbg_display_mv_flag;
|
||||
#endif
|
||||
vpx_image_t img;
|
||||
int img_setup;
|
||||
int img_avail;
|
||||
@@ -253,8 +260,11 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t *data,
|
||||
unsigned int data_sz,
|
||||
vpx_codec_stream_info_t *si)
|
||||
{
|
||||
|
||||
vpx_codec_err_t res = VPX_CODEC_OK;
|
||||
|
||||
if(data + data_sz <= data)
|
||||
res = VPX_CODEC_INVALID_PARAM;
|
||||
else
|
||||
{
|
||||
/* Parse uncompresssed part of key frame header.
|
||||
* 3 bytes:- including version, frame type and an offset
|
||||
@@ -331,7 +341,10 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
|
||||
|
||||
ctx->img_avail = 0;
|
||||
|
||||
/* Determine the stream parameters */
|
||||
/* Determine the stream parameters. Note that we rely on peek_si to
|
||||
* validate that we have a buffer that does not wrap around the top
|
||||
* of the heap.
|
||||
*/
|
||||
if (!ctx->si.h)
|
||||
res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si);
|
||||
|
||||
@@ -410,15 +423,27 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
|
||||
{
|
||||
YV12_BUFFER_CONFIG sd;
|
||||
INT64 time_stamp = 0, time_end_stamp = 0;
|
||||
int ppflag = 0;
|
||||
int ppdeblocking = 0;
|
||||
int ppnoise = 0;
|
||||
vp8_ppflags_t flags = {0};
|
||||
|
||||
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
|
||||
{
|
||||
ppflag = ctx->postproc_cfg.post_proc_flag;
|
||||
ppdeblocking = ctx->postproc_cfg.deblocking_level;
|
||||
ppnoise = ctx->postproc_cfg.noise_level;
|
||||
flags.post_proc_flag= ctx->postproc_cfg.post_proc_flag
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
|
||||
| ((ctx->dbg_color_ref_frame_flag != 0) ? VP8D_DEBUG_CLR_FRM_REF_BLKS : 0)
|
||||
| ((ctx->dbg_color_mb_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0)
|
||||
| ((ctx->dbg_color_b_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0)
|
||||
| ((ctx->dbg_display_mv_flag != 0) ? VP8D_DEBUG_DRAW_MV : 0)
|
||||
#endif
|
||||
;
|
||||
flags.deblocking_level = ctx->postproc_cfg.deblocking_level;
|
||||
flags.noise_level = ctx->postproc_cfg.noise_level;
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
flags.display_ref_frame_flag= ctx->dbg_color_ref_frame_flag;
|
||||
flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag;
|
||||
flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag;
|
||||
flags.display_mv_flag = ctx->dbg_display_mv_flag;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline))
|
||||
@@ -427,7 +452,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
|
||||
res = update_error_state(ctx, &pbi->common.error);
|
||||
}
|
||||
|
||||
if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, ppdeblocking, ppnoise, ppflag))
|
||||
if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags))
|
||||
{
|
||||
/* Align width/height */
|
||||
unsigned int a_w = (sd.y_width + 15) & ~15;
|
||||
@@ -441,6 +466,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
|
||||
vpx_img_set_rect(&ctx->img,
|
||||
VP8BORDERINPIXELS, VP8BORDERINPIXELS,
|
||||
sd.y_width, sd.y_height);
|
||||
ctx->img.user_priv = user_priv;
|
||||
ctx->img_avail = 1;
|
||||
|
||||
}
|
||||
@@ -640,12 +666,79 @@ static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx,
|
||||
#endif
|
||||
}
|
||||
|
||||
static vpx_codec_err_t vp8_set_dbg_options(vpx_codec_alg_priv_t *ctx,
|
||||
int ctrl_id,
|
||||
va_list args)
|
||||
{
|
||||
#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC
|
||||
int data = va_arg(args, int);
|
||||
|
||||
#define MAP(id, var) case id: var = data; break;
|
||||
|
||||
switch (ctrl_id)
|
||||
{
|
||||
MAP (VP8_SET_DBG_COLOR_REF_FRAME, ctx->dbg_color_ref_frame_flag);
|
||||
MAP (VP8_SET_DBG_COLOR_MB_MODES, ctx->dbg_color_mb_modes_flag);
|
||||
MAP (VP8_SET_DBG_COLOR_B_MODES, ctx->dbg_color_b_modes_flag);
|
||||
MAP (VP8_SET_DBG_DISPLAY_MV, ctx->dbg_display_mv_flag);
|
||||
}
|
||||
|
||||
return VPX_CODEC_OK;
|
||||
#else
|
||||
return VPX_CODEC_INCAPABLE;
|
||||
#endif
|
||||
}
|
||||
|
||||
static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
|
||||
int ctrl_id,
|
||||
va_list args)
|
||||
{
|
||||
int *update_info = va_arg(args, int *);
|
||||
VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi;
|
||||
|
||||
if (update_info)
|
||||
{
|
||||
*update_info = pbi->common.refresh_alt_ref_frame * (int) VP8_ALTR_FRAME
|
||||
+ pbi->common.refresh_golden_frame * (int) VP8_GOLD_FRAME
|
||||
+ pbi->common.refresh_last_frame * (int) VP8_LAST_FRAME;
|
||||
|
||||
return VPX_CODEC_OK;
|
||||
}
|
||||
else
|
||||
return VPX_CODEC_INVALID_PARAM;
|
||||
}
|
||||
|
||||
|
||||
static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
|
||||
int ctrl_id,
|
||||
va_list args)
|
||||
{
|
||||
|
||||
int *corrupted = va_arg(args, int *);
|
||||
|
||||
if (corrupted)
|
||||
{
|
||||
VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi;
|
||||
*corrupted = pbi->common.frame_to_show->corrupted;
|
||||
|
||||
return VPX_CODEC_OK;
|
||||
}
|
||||
else
|
||||
return VPX_CODEC_INVALID_PARAM;
|
||||
|
||||
}
|
||||
|
||||
vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] =
|
||||
{
|
||||
{VP8_SET_REFERENCE, vp8_set_reference},
|
||||
{VP8_COPY_REFERENCE, vp8_get_reference},
|
||||
{VP8_SET_POSTPROC, vp8_set_postproc},
|
||||
{VP8_SET_REFERENCE, vp8_set_reference},
|
||||
{VP8_COPY_REFERENCE, vp8_get_reference},
|
||||
{VP8_SET_POSTPROC, vp8_set_postproc},
|
||||
{VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_options},
|
||||
{VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_options},
|
||||
{VP8_SET_DBG_COLOR_B_MODES, vp8_set_dbg_options},
|
||||
{VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_options},
|
||||
{VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates},
|
||||
{VP8D_GET_FRAME_CORRUPTED, vp8_get_frame_corrupted},
|
||||
{ -1, NULL},
|
||||
};
|
||||
|
||||
|
@@ -94,6 +94,7 @@ VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/variance_x86.h
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c
|
||||
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_mmx.c
|
||||
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm
|
||||
@@ -107,8 +108,11 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
|
||||
|
||||
|
@@ -118,7 +118,9 @@ vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx,
|
||||
{
|
||||
vpx_codec_err_t res;
|
||||
|
||||
if (!ctx || !data || !data_sz)
|
||||
/* Sanity checks */
|
||||
/* NULL data ptr allowed if data_sz is 0 too */
|
||||
if (!ctx || (!data && data_sz))
|
||||
res = VPX_CODEC_INVALID_PARAM;
|
||||
else if (!ctx->iface || !ctx->priv)
|
||||
res = VPX_CODEC_ERROR;
|
||||
|
34
vpx/vp8.h
34
vpx/vp8.h
@@ -38,9 +38,13 @@
|
||||
*/
|
||||
enum vp8_dec_control_id
|
||||
{
|
||||
VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */
|
||||
VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
|
||||
VP8_SET_POSTPROC = 3, /**< set decoder's the post processing settings */
|
||||
VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */
|
||||
VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
|
||||
VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */
|
||||
VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */
|
||||
VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
|
||||
VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */
|
||||
VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */
|
||||
VP8_COMMON_CTRL_ID_MAX
|
||||
};
|
||||
|
||||
@@ -50,10 +54,14 @@ enum vp8_dec_control_id
|
||||
*/
|
||||
enum vp8_postproc_level
|
||||
{
|
||||
VP8_NOFILTERING = 0,
|
||||
VP8_DEBLOCK = 1,
|
||||
VP8_DEMACROBLOCK = 2,
|
||||
VP8_ADDNOISE = 4
|
||||
VP8_NOFILTERING = 0,
|
||||
VP8_DEBLOCK = 1<<0,
|
||||
VP8_DEMACROBLOCK = 1<<1,
|
||||
VP8_ADDNOISE = 1<<2,
|
||||
VP8_DEBUG_TXT_FRAME_INFO = 1<<3, /**< print frame information */
|
||||
VP8_DEBUG_TXT_MBLK_MODES = 1<<4, /**< print macro block modes over each macro block */
|
||||
VP8_DEBUG_TXT_DC_DIFF = 1<<5, /**< print dc diff for each macro block */
|
||||
VP8_DEBUG_TXT_RATE_INFO = 1<<6, /**< print video rate info (encoder only) */
|
||||
};
|
||||
|
||||
/*!\brief post process flags
|
||||
@@ -65,9 +73,9 @@ enum vp8_postproc_level
|
||||
|
||||
typedef struct vp8_postproc_cfg
|
||||
{
|
||||
int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */
|
||||
int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */
|
||||
int noise_level; /**< the strength of additive noise, valid range [0, 16] */
|
||||
int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */
|
||||
int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */
|
||||
int noise_level; /**< the strength of additive noise, valid range [0, 16] */
|
||||
} vp8_postproc_cfg_t;
|
||||
|
||||
/*!\brief reference frame type
|
||||
@@ -95,12 +103,16 @@ typedef struct vpx_ref_frame
|
||||
|
||||
/*!\brief vp8 decoder control funciton parameter type
|
||||
*
|
||||
* defines the data type for each of VP8 decoder control funciton requires
|
||||
* defines the data type for each of VP8 decoder control function requires
|
||||
*/
|
||||
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *)
|
||||
VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *)
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *)
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int)
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int)
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int)
|
||||
VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int)
|
||||
|
||||
|
||||
/*! @} - end defgroup vp8 */
|
||||
|
19
vpx/vp8cx.h
19
vpx/vp8cx.h
@@ -140,7 +140,9 @@ enum vp8e_enc_control_id
|
||||
VP8E_SET_ARNR_MAXFRAMES, /**< control function to set the max number of frames blurred creating arf*/
|
||||
VP8E_SET_ARNR_STRENGTH , /**< control function to set the filter strength for the arf */
|
||||
VP8E_SET_ARNR_TYPE , /**< control function to set the type of filter to use for the arf*/
|
||||
} ;
|
||||
VP8E_SET_TUNING, /**< control function to set visual tuning */
|
||||
VP8E_SET_CQ_LEVEL, /**< control function to set constrained quality level */
|
||||
};
|
||||
|
||||
/*!\brief vpx 1-D scaling mode
|
||||
*
|
||||
@@ -224,6 +226,18 @@ typedef enum
|
||||
} vp8e_token_partitions;
|
||||
|
||||
|
||||
/*!\brief VP8 model tuning parameters
|
||||
*
|
||||
* Changes the encoder to tune for certain types of input material.
|
||||
*
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
VP8_TUNE_PSNR,
|
||||
VP8_TUNE_SSIM
|
||||
} vp8e_tuning;
|
||||
|
||||
|
||||
/*!\brief VP8 encoder control function parameter type
|
||||
*
|
||||
* Defines the data types that VP8E control functions take. Note that
|
||||
@@ -253,7 +267,8 @@ VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, vp8e_token_partitions)
|
||||
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int)
|
||||
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH , unsigned int)
|
||||
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE , unsigned int)
|
||||
|
||||
VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, vp8e_tuning)
|
||||
VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL , unsigned int)
|
||||
|
||||
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *)
|
||||
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *)
|
||||
|
26
vpx/vp8dx.h
26
vpx/vp8dx.h
@@ -36,6 +36,32 @@ extern vpx_codec_iface_t* vpx_codec_vp8_dx(void);
|
||||
#include "vp8.h"
|
||||
|
||||
|
||||
/*!\brief VP8 decoder control functions
|
||||
*
|
||||
* The set of macros define the control functions of VP8 decoder interface
|
||||
*/
|
||||
enum vp8d_dec_control_id
|
||||
{
|
||||
VP8_DECODER_CTRL_ID_START = 256,
|
||||
VP8D_GET_LAST_REF_UPDATES, /**< control function to get info on which reference frames were updated
|
||||
by the last decode */
|
||||
VP8D_GET_FRAME_CORRUPTED, /**< check if the indicated frame is corrupted */
|
||||
VP8_DECODER_CTRL_ID_MAX
|
||||
} ;
|
||||
|
||||
|
||||
/*!\brief VP8 encoder control function parameter type
|
||||
*
|
||||
* Defines the data types that VP8E control functions take. Note that
|
||||
* additional common controls are defined in vp8.h
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *)
|
||||
VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *)
|
||||
|
||||
|
||||
/*! @} - end defgroup vp8_decoder */
|
||||
|
||||
|
||||
|
@@ -179,7 +179,8 @@ extern "C" {
|
||||
enum vpx_rc_mode
|
||||
{
|
||||
VPX_VBR, /**< Variable Bit Rate (VBR) mode */
|
||||
VPX_CBR /**< Constant Bit Rate (CBR) mode */
|
||||
VPX_CBR, /**< Constant Bit Rate (CBR) mode */
|
||||
VPX_CQ /**< Constant Quality (CQ) mode */
|
||||
};
|
||||
|
||||
|
||||
|
@@ -74,6 +74,7 @@ void __cpuid(int CPUInfo[4], int info_type);
|
||||
#define HAS_SSE2 0x04
|
||||
#define HAS_SSE3 0x08
|
||||
#define HAS_SSSE3 0x10
|
||||
#define HAS_SSE4_1 0x20
|
||||
#ifndef BIT
|
||||
#define BIT(n) (1<<n)
|
||||
#endif
|
||||
@@ -117,6 +118,8 @@ x86_simd_caps(void)
|
||||
|
||||
if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
|
||||
|
||||
if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
|
||||
|
||||
return flags & mask;
|
||||
}
|
||||
|
||||
|
@@ -81,6 +81,8 @@ vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int
|
||||
|
||||
ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * ybf->uv_stride) + border / 2;
|
||||
ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * ybf->uv_stride) + border / 2;
|
||||
|
||||
ybf->corrupted = 0; /* assume not currupted by errors */
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@@ -57,6 +57,8 @@ extern "C"
|
||||
int border;
|
||||
int frame_size;
|
||||
YUV_TYPE clrtype;
|
||||
|
||||
int corrupted;
|
||||
} YV12_BUFFER_CONFIG;
|
||||
|
||||
int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border);
|
||||
|
94
vpxdec.c
94
vpxdec.c
@@ -35,6 +35,7 @@
|
||||
#if CONFIG_MD5
|
||||
#include "md5_utils.h"
|
||||
#endif
|
||||
#include "tools_common.h"
|
||||
#include "nestegg/include/nestegg/nestegg.h"
|
||||
|
||||
#ifndef PATH_MAX
|
||||
@@ -107,11 +108,19 @@ static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level",
|
||||
"Enable VP8 demacroblocking, w/ level");
|
||||
static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
|
||||
"Enable VP8 visible debug info");
|
||||
|
||||
static const arg_def_t pp_disp_ref_frame = ARG_DEF(NULL, "pp-dbg-ref-frame", 1,
|
||||
"Display only selected reference frame per macro block");
|
||||
static const arg_def_t pp_disp_mb_modes = ARG_DEF(NULL, "pp-dbg-mb-modes", 1,
|
||||
"Display only selected macro block modes");
|
||||
static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1,
|
||||
"Display only selected block modes");
|
||||
static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1,
|
||||
"Draw only selected motion vectors");
|
||||
|
||||
static const arg_def_t *vp8_pp_args[] =
|
||||
{
|
||||
&addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
|
||||
&pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs,
|
||||
NULL
|
||||
};
|
||||
#endif
|
||||
@@ -314,7 +323,8 @@ void *out_open(const char *out_fn, int do_md5)
|
||||
}
|
||||
else
|
||||
{
|
||||
FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb") : stdout;
|
||||
FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb")
|
||||
: set_binary_mode(stdout);
|
||||
|
||||
if (!outfile)
|
||||
{
|
||||
@@ -432,6 +442,8 @@ unsigned int file_is_raw(FILE *infile,
|
||||
int is_raw = 0;
|
||||
vpx_codec_stream_info_t si;
|
||||
|
||||
si.sz = sizeof(si);
|
||||
|
||||
if (fread(buf, 1, 32, infile) == 32)
|
||||
{
|
||||
int i;
|
||||
@@ -540,6 +552,7 @@ webm_guess_framerate(struct input_ctx *input,
|
||||
*fps_den = tstamp / 1000;
|
||||
return 0;
|
||||
fail:
|
||||
nestegg_destroy(input->nestegg_ctx);
|
||||
input->nestegg_ctx = NULL;
|
||||
rewind(input->infile);
|
||||
return 1;
|
||||
@@ -702,6 +715,10 @@ int main(int argc, const char **argv_)
|
||||
vpx_codec_dec_cfg_t cfg = {0};
|
||||
#if CONFIG_VP8_DECODER
|
||||
vp8_postproc_cfg_t vp8_pp_cfg = {0};
|
||||
int vp8_dbg_color_ref_frame = 0;
|
||||
int vp8_dbg_color_mb_modes = 0;
|
||||
int vp8_dbg_color_b_modes = 0;
|
||||
int vp8_dbg_display_mv = 0;
|
||||
#endif
|
||||
struct input_ctx input = {0};
|
||||
|
||||
@@ -787,6 +804,42 @@ int main(int argc, const char **argv_)
|
||||
if (level)
|
||||
vp8_pp_cfg.post_proc_flag |= level;
|
||||
}
|
||||
else if (arg_match(&arg, &pp_disp_ref_frame, argi))
|
||||
{
|
||||
unsigned int flags = arg_parse_int(&arg);
|
||||
if (flags)
|
||||
{
|
||||
postproc = 1;
|
||||
vp8_dbg_color_ref_frame = flags;
|
||||
}
|
||||
}
|
||||
else if (arg_match(&arg, &pp_disp_mb_modes, argi))
|
||||
{
|
||||
unsigned int flags = arg_parse_int(&arg);
|
||||
if (flags)
|
||||
{
|
||||
postproc = 1;
|
||||
vp8_dbg_color_mb_modes = flags;
|
||||
}
|
||||
}
|
||||
else if (arg_match(&arg, &pp_disp_b_modes, argi))
|
||||
{
|
||||
unsigned int flags = arg_parse_int(&arg);
|
||||
if (flags)
|
||||
{
|
||||
postproc = 1;
|
||||
vp8_dbg_color_b_modes = flags;
|
||||
}
|
||||
}
|
||||
else if (arg_match(&arg, &pp_disp_mvs, argi))
|
||||
{
|
||||
unsigned int flags = arg_parse_int(&arg);
|
||||
if (flags)
|
||||
{
|
||||
postproc = 1;
|
||||
vp8_dbg_display_mv = flags;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
else
|
||||
@@ -805,7 +858,7 @@ int main(int argc, const char **argv_)
|
||||
usage_exit();
|
||||
|
||||
/* Open file */
|
||||
infile = strcmp(fn, "-") ? fopen(fn, "rb") : stdin;
|
||||
infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin);
|
||||
|
||||
if (!infile)
|
||||
{
|
||||
@@ -876,7 +929,13 @@ int main(int argc, const char **argv_)
|
||||
}
|
||||
|
||||
if(input.kind == WEBM_FILE)
|
||||
webm_guess_framerate(&input, &fps_den, &fps_num);
|
||||
if(webm_guess_framerate(&input, &fps_den, &fps_num))
|
||||
{
|
||||
fprintf(stderr, "Failed to guess framerate -- error parsing "
|
||||
"webm file?\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
|
||||
/*Note: We can't output an aspect ratio here because IVF doesn't
|
||||
store one, and neither does VP8.
|
||||
@@ -920,6 +979,33 @@ int main(int argc, const char **argv_)
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_dbg_color_ref_frame
|
||||
&& vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame))
|
||||
{
|
||||
fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_dbg_color_mb_modes
|
||||
&& vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes))
|
||||
{
|
||||
fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_dbg_color_b_modes
|
||||
&& vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes))
|
||||
{
|
||||
fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_dbg_display_mv
|
||||
&& vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv))
|
||||
{
|
||||
fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Decode file */
|
||||
|
108
vpxenc.c
Normal file → Executable file
108
vpxenc.c
Normal file → Executable file
@@ -35,6 +35,7 @@
|
||||
#include "vpx/vp8cx.h"
|
||||
#include "vpx_ports/mem_ops.h"
|
||||
#include "vpx_ports/vpx_timer.h"
|
||||
#include "tools_common.h"
|
||||
#include "y4minput.h"
|
||||
#include "libmkv/EbmlWriter.h"
|
||||
#include "libmkv/EbmlIDs.h"
|
||||
@@ -185,11 +186,11 @@ int stats_open_mem(stats_io_t *stats, int pass)
|
||||
}
|
||||
|
||||
|
||||
void stats_close(stats_io_t *stats)
|
||||
void stats_close(stats_io_t *stats, int last_pass)
|
||||
{
|
||||
if (stats->file)
|
||||
{
|
||||
if (stats->pass == 1)
|
||||
if (stats->pass == last_pass)
|
||||
{
|
||||
#if 0
|
||||
#elif USE_POSIX_MMAP
|
||||
@@ -204,7 +205,7 @@ void stats_close(stats_io_t *stats)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (stats->pass == 1)
|
||||
if (stats->pass == last_pass)
|
||||
free(stats->buf.buf);
|
||||
}
|
||||
}
|
||||
@@ -250,7 +251,8 @@ enum video_file_type
|
||||
|
||||
struct detect_buffer {
|
||||
char buf[4];
|
||||
int valid;
|
||||
size_t buf_read;
|
||||
size_t position;
|
||||
};
|
||||
|
||||
|
||||
@@ -304,14 +306,21 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
|
||||
|
||||
for (r = 0; r < h; r++)
|
||||
{
|
||||
if (detect->valid)
|
||||
size_t needed = w;
|
||||
size_t buf_position = 0;
|
||||
const size_t left = detect->buf_read - detect->position;
|
||||
if (left > 0)
|
||||
{
|
||||
memcpy(ptr, detect->buf, 4);
|
||||
shortread |= fread(ptr+4, 1, w-4, f) < w-4;
|
||||
detect->valid = 0;
|
||||
const size_t more = (left < needed) ? left : needed;
|
||||
memcpy(ptr, detect->buf + detect->position, more);
|
||||
buf_position = more;
|
||||
needed -= more;
|
||||
detect->position += more;
|
||||
}
|
||||
if (needed > 0)
|
||||
{
|
||||
shortread |= (fread(ptr + buf_position, 1, needed, f) < needed);
|
||||
}
|
||||
else
|
||||
shortread |= fread(ptr, 1, w, f) < w;
|
||||
|
||||
ptr += img->stride[plane];
|
||||
}
|
||||
@@ -338,12 +347,12 @@ unsigned int file_is_ivf(FILE *infile,
|
||||
unsigned int *fourcc,
|
||||
unsigned int *width,
|
||||
unsigned int *height,
|
||||
char detect[4])
|
||||
struct detect_buffer *detect)
|
||||
{
|
||||
char raw_hdr[IVF_FILE_HDR_SZ];
|
||||
int is_ivf = 0;
|
||||
|
||||
if(memcmp(detect, "DKIF", 4) != 0)
|
||||
if(memcmp(detect->buf, "DKIF", 4) != 0)
|
||||
return 0;
|
||||
|
||||
/* See write_ivf_file_header() for more documentation on the file header
|
||||
@@ -367,6 +376,7 @@ unsigned int file_is_ivf(FILE *infile,
|
||||
{
|
||||
*width = mem_get_le16(raw_hdr + 12);
|
||||
*height = mem_get_le16(raw_hdr + 14);
|
||||
detect->position = 4;
|
||||
}
|
||||
|
||||
return is_ivf;
|
||||
@@ -434,7 +444,7 @@ struct EbmlGlobal
|
||||
int debug;
|
||||
|
||||
FILE *stream;
|
||||
uint64_t last_pts_ms;
|
||||
int64_t last_pts_ms;
|
||||
vpx_rational_t framerate;
|
||||
|
||||
/* These pointers are to the start of an element */
|
||||
@@ -647,7 +657,7 @@ write_webm_block(EbmlGlobal *glob,
|
||||
unsigned char track_number;
|
||||
unsigned short block_timecode = 0;
|
||||
unsigned char flags;
|
||||
uint64_t pts_ms;
|
||||
int64_t pts_ms;
|
||||
int start_cluster = 0, is_keyframe;
|
||||
|
||||
/* Calculate the PTS of this frame in milliseconds */
|
||||
@@ -907,7 +917,7 @@ static const arg_def_t resize_up_thresh = ARG_DEF(NULL, "resize-up", 1,
|
||||
static const arg_def_t resize_down_thresh = ARG_DEF(NULL, "resize-down", 1,
|
||||
"Downscale threshold (buf %)");
|
||||
static const arg_def_t end_usage = ARG_DEF(NULL, "end-usage", 1,
|
||||
"VBR=0 | CBR=1");
|
||||
"VBR=0 | CBR=1 | CQ=2");
|
||||
static const arg_def_t target_bitrate = ARG_DEF(NULL, "target-bitrate", 1,
|
||||
"Bitrate (kbps)");
|
||||
static const arg_def_t min_quantizer = ARG_DEF(NULL, "min-q", 1,
|
||||
@@ -978,23 +988,34 @@ static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1,
|
||||
static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1,
|
||||
"Enable automatic alt reference frames");
|
||||
static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1,
|
||||
"alt_ref Max Frames");
|
||||
"AltRef Max Frames");
|
||||
static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1,
|
||||
"alt_ref Strength");
|
||||
"AltRef Strength");
|
||||
static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1,
|
||||
"alt_ref Type");
|
||||
"AltRef Type");
|
||||
static const struct arg_enum_list tuning_enum[] = {
|
||||
{"psnr", VP8_TUNE_PSNR},
|
||||
{"ssim", VP8_TUNE_SSIM},
|
||||
{NULL, 0}
|
||||
};
|
||||
static const arg_def_t tune_ssim = ARG_DEF_ENUM(NULL, "tune", 1,
|
||||
"Material to favor", tuning_enum);
|
||||
static const arg_def_t cq_level = ARG_DEF(NULL, "cq-level", 1,
|
||||
"Constrained Quality Level");
|
||||
|
||||
static const arg_def_t *vp8_args[] =
|
||||
{
|
||||
&cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
|
||||
&token_parts, &arnr_maxframes, &arnr_strength, &arnr_type, NULL
|
||||
&token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
|
||||
&tune_ssim, &cq_level, NULL
|
||||
};
|
||||
static const int vp8_arg_ctrl_map[] =
|
||||
{
|
||||
VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
|
||||
VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
|
||||
VP8E_SET_TOKEN_PARTITIONS,
|
||||
VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE, 0
|
||||
VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE,
|
||||
VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, 0
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -1073,6 +1094,7 @@ int main(int argc, const char **argv_)
|
||||
int psnr_count = 0;
|
||||
|
||||
exec_name = argv_[0];
|
||||
ebml.last_pts_ms = -1;
|
||||
|
||||
if (argc < 3)
|
||||
usage_exit();
|
||||
@@ -1189,6 +1211,12 @@ int main(int argc, const char **argv_)
|
||||
*/
|
||||
cfg.g_timebase.den = 1000;
|
||||
|
||||
/* Never use the library's default resolution, require it be parsed
|
||||
* from the file or set on the command line.
|
||||
*/
|
||||
cfg.g_w = 0;
|
||||
cfg.g_h = 0;
|
||||
|
||||
/* Now parse the remainder of the parameters. */
|
||||
for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
|
||||
{
|
||||
@@ -1300,7 +1328,7 @@ int main(int argc, const char **argv_)
|
||||
if (arg_ctrl_cnt < ARG_CTRL_CNT_MAX)
|
||||
{
|
||||
arg_ctrls[arg_ctrl_cnt][0] = ctrl_args_map[i];
|
||||
arg_ctrls[arg_ctrl_cnt][1] = arg_parse_int(&arg);
|
||||
arg_ctrls[arg_ctrl_cnt][1] = arg_parse_enum_or_int(&arg);
|
||||
arg_ctrl_cnt++;
|
||||
}
|
||||
}
|
||||
@@ -1330,11 +1358,11 @@ int main(int argc, const char **argv_)
|
||||
{
|
||||
int frames_in = 0, frames_out = 0;
|
||||
unsigned long nbytes = 0;
|
||||
size_t detect_bytes;
|
||||
struct detect_buffer detect;
|
||||
|
||||
/* Parse certain options from the input file, if possible */
|
||||
infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb") : stdin;
|
||||
infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb")
|
||||
: set_binary_mode(stdin);
|
||||
|
||||
if (!infile)
|
||||
{
|
||||
@@ -1344,13 +1372,11 @@ int main(int argc, const char **argv_)
|
||||
|
||||
/* For RAW input sources, these bytes will applied on the first frame
|
||||
* in read_frame().
|
||||
* We can always read 4 bytes because the minimum supported frame size
|
||||
* is 2x2.
|
||||
*/
|
||||
detect_bytes = fread(detect.buf, 1, 4, infile);
|
||||
detect.valid = 0;
|
||||
detect.buf_read = fread(detect.buf, 1, 4, infile);
|
||||
detect.position = 0;
|
||||
|
||||
if (detect_bytes == 4 && file_is_y4m(infile, &y4m, detect.buf))
|
||||
if (detect.buf_read == 4 && file_is_y4m(infile, &y4m, detect.buf))
|
||||
{
|
||||
if (y4m_input_open(&y4m, infile, detect.buf, 4) >= 0)
|
||||
{
|
||||
@@ -1375,8 +1401,8 @@ int main(int argc, const char **argv_)
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
else if (detect_bytes == 4 &&
|
||||
file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, detect.buf))
|
||||
else if (detect.buf_read == 4 &&
|
||||
file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, &detect))
|
||||
{
|
||||
file_type = FILE_TYPE_IVF;
|
||||
switch (fourcc)
|
||||
@@ -1395,8 +1421,15 @@ int main(int argc, const char **argv_)
|
||||
else
|
||||
{
|
||||
file_type = FILE_TYPE_RAW;
|
||||
detect.valid = 1;
|
||||
}
|
||||
|
||||
if(!cfg.g_w || !cfg.g_h)
|
||||
{
|
||||
fprintf(stderr, "Specify stream dimensions with --width (-w) "
|
||||
" and --height (-h).\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
#define SHOW(field) fprintf(stderr, " %-28s = %d\n", #field, cfg.field)
|
||||
|
||||
if (verbose && pass == 0)
|
||||
@@ -1449,7 +1482,8 @@ int main(int argc, const char **argv_)
|
||||
cfg.g_w, cfg.g_h, 1);
|
||||
}
|
||||
|
||||
outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb") : stdout;
|
||||
outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb")
|
||||
: set_binary_mode(stdout);
|
||||
|
||||
if (!outfile)
|
||||
{
|
||||
@@ -1527,7 +1561,7 @@ int main(int argc, const char **argv_)
|
||||
vpx_codec_iter_t iter = NULL;
|
||||
const vpx_codec_cx_pkt_t *pkt;
|
||||
struct vpx_usec_timer timer;
|
||||
int64_t frame_start;
|
||||
int64_t frame_start, next_frame_start;
|
||||
|
||||
if (!arg_limit || frames_in < arg_limit)
|
||||
{
|
||||
@@ -1548,9 +1582,11 @@ int main(int argc, const char **argv_)
|
||||
|
||||
frame_start = (cfg.g_timebase.den * (int64_t)(frames_in - 1)
|
||||
* arg_framerate.den) / cfg.g_timebase.num / arg_framerate.num;
|
||||
next_frame_start = (cfg.g_timebase.den * (int64_t)(frames_in)
|
||||
* arg_framerate.den)
|
||||
/ cfg.g_timebase.num / arg_framerate.num;
|
||||
vpx_codec_encode(&encoder, frame_avail ? &raw : NULL, frame_start,
|
||||
cfg.g_timebase.den * arg_framerate.den
|
||||
/ cfg.g_timebase.num / arg_framerate.num,
|
||||
next_frame_start - frame_start,
|
||||
0, arg_deadline);
|
||||
vpx_usec_timer_mark(&timer);
|
||||
cx_time += vpx_usec_timer_elapsed(&timer);
|
||||
@@ -1658,7 +1694,7 @@ int main(int argc, const char **argv_)
|
||||
}
|
||||
|
||||
fclose(outfile);
|
||||
stats_close(&stats);
|
||||
stats_close(&stats, arg_passes-1);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
if (one_pass_only)
|
||||
|
Reference in New Issue
Block a user